Openinference Semantic Conventions

openinference/spec/semantic_conventions.md at main · Arize-ai/openinferenceGitHub

When sending traces, you may want to set your own custom attributes on each span. Semantic conventions are special attribute keys or values that may take on special meaning. In Arize, certain attribute keys are displayed in more prominent ways, in addition to showing up the in the attributes tab like keys.

Types of Attributes

class SpanAttributes:
    OUTPUT_VALUE = "output.value"
    OUTPUT_MIME_TYPE = "output.mime_type"
    """
    The type of output.value. If unspecified, the type is plain text by default.
    If type is JSON, the value is a string representing a JSON object.
    """
    INPUT_VALUE = "input.value"
    INPUT_MIME_TYPE = "input.mime_type"
    """
    The type of input.value. If unspecified, the type is plain text by default.
    If type is JSON, the value is a string representing a JSON object.
    """

    EMBEDDING_EMBEDDINGS = "embedding.embeddings"
    """
    A list of objects containing embedding data, including the vector and represented piece of text.
    """
    EMBEDDING_MODEL_NAME = "embedding.model_name"
    """
    The name of the embedding model.
    """

    LLM_FUNCTION_CALL = "llm.function_call"
    """
    For models and APIs that support function calling. Records attributes such as the function
    name and arguments to the called function.
    """
    LLM_INVOCATION_PARAMETERS = "llm.invocation_parameters"
    """
    Invocation parameters passed to the LLM or API, such as the model name, temperature, etc.
    """
    LLM_INPUT_MESSAGES = "llm.input_messages"
    """
    Messages provided to a chat API.
    """
    LLM_OUTPUT_MESSAGES = "llm.output_messages"
    """
    Messages received from a chat API.
    """
    LLM_MODEL_NAME = "llm.model_name"
    """
    The name of the model being used.
    """
    LLM_PROMPTS = "llm.prompts"
    """
    Prompts provided to a completions API.
    """
    LLM_PROMPT_TEMPLATE = "llm.prompt_template.template"
    """
    The prompt template as a Python f-string.
    """
    LLM_PROMPT_TEMPLATE_VARIABLES = "llm.prompt_template.variables"
    """
    A list of input variables to the prompt template.
    """
    LLM_PROMPT_TEMPLATE_VERSION = "llm.prompt_template.version"
    """
    The version of the prompt template being used.
    """
    LLM_TOKEN_COUNT_PROMPT = "llm.token_count.prompt"
    """
    Number of tokens in the prompt.
    """
    LLM_TOKEN_COUNT_COMPLETION = "llm.token_count.completion"
    """
    Number of tokens in the completion.
    """
    LLM_TOKEN_COUNT_TOTAL = "llm.token_count.total"
    """
    Total number of tokens, including both prompt and completion.
    """

    TOOL_NAME = "tool.name"
    """
    Name of the tool being used.
    """
    TOOL_DESCRIPTION = "tool.description"
    """
    Description of the tool's purpose, typically used to select the tool.
    """
    TOOL_PARAMETERS = "tool.parameters"
    """
    Parameters of the tool represented a dictionary JSON string, e.g.
    see https://platform.openai.com/docs/guides/gpt/function-calling
    """

    RETRIEVAL_DOCUMENTS = "retrieval.documents"

    METADATA = "metadata"
    """
    Metadata attributes are used to store user-defined key-value pairs.
    For example, LangChain uses metadata to store user-defined attributes for a chain.
    """

    TAG_TAGS = "tag.tags"
    """
    Custom categorical tags for the span.
    """

    OPENINFERENCE_SPAN_KIND = "openinference.span.kind"

    SESSION_ID = "session.id"
    """
    The id of the session
    """
    USER_ID = "user.id"
    """
    The id of the user
    """

class MessageAttributes:
    """
    Attributes for a message generated by a LLM
    """

    MESSAGE_ROLE = "message.role"
    """
    The role of the message, such as "user", "agent", "function".
    """
    MESSAGE_CONTENT = "message.content"
    """
    The content of the message to the llm
    """
    MESSAGE_NAME = "message.name"
    """
    The name of the message, often used to identify the function
    that was used to generate the message.
    """
    MESSAGE_TOOL_CALLS = "message.tool_calls"
    """
    The tool calls generated by the model, such as function calls.
    """
    MESSAGE_FUNCTION_CALL_NAME = "message.function_call_name"
    """
    The function name that is a part of the message list.
    This is populated for role 'function' or 'agent' as a mechanism to identify
    the function that was called during the execution of a tool
    """
    MESSAGE_FUNCTION_CALL_ARGUMENTS_JSON = "message.function_call_arguments_json"
    """
    The JSON string representing the arguments passed to the function
    during a function call
    """

class DocumentAttributes:
    """
    Attributes for a document
    """

    DOCUMENT_ID = "document.id"
    """
    The id of the document
    """
    DOCUMENT_SCORE = "document.score"
    """
    The score of the document
    """
    DOCUMENT_CONTENT = "document.content"
    """
    The content of the document
    """
    DOCUMENT_METADATA = "document.metadata"
    """
    The metadata of the document represented as a dictionary
    JSON string, e.g. `"{ 'title': 'foo' }"`
    """

class RerankerAttributes:
    """
    Attributes for a reranker
    """

    RERANKER_INPUT_DOCUMENTS = "reranker.input_documents"
    """
    List of documents as input to the reranker
    """
    RERANKER_OUTPUT_DOCUMENTS = "reranker.output_documents"
    """
    List of documents as output from the reranker
    """
    RERANKER_QUERY = "reranker.query"
    """
    Query string for the reranker
    """
    RERANKER_MODEL_NAME = "reranker.model_name"
    """
    Model name of the reranker
    """
    RERANKER_TOP_K = "reranker.top_k"
    """
    Top K parameter of the reranker
    """

class EmbeddingAttributes:
    """
    Attributes for an embedding
    """

    EMBEDDING_TEXT = "embedding.text"
    """
    The text represented by the embedding.
    """
    EMBEDDING_VECTOR = "embedding.vector"
    """
    The embedding vector.
    """

class ToolCallAttributes:
    """
    Attributes for a tool call
    """

    TOOL_CALL_FUNCTION_NAME = "tool_call.function.name"
    """
    The name of function that is being called during a tool call.
    """
    TOOL_CALL_FUNCTION_ARGUMENTS_JSON = "tool_call.function.arguments"
    """
    The JSON string representing the arguments passed to the function
    during a tool call.
    """

For a complete guide to Python semantic conventions, refer to the following resource on GitHub: OpenInference Python Semantic Conventions.

You can also see example values for each attribute below and our Readme for our semantic conventions.

Attribute

Type

Example

Description

document.content

String

"This is a sample document content."

The content of a retrieved document

document.id

String/Integer

"1234" or 1

Unique identifier for a document

document.metadata

JSON String

"{'author': 'John Doe', 'date': '2023-09-09'}"

Metadata associated with a document

document.score

Float

0.98

Score representing the relevance of a document

embedding.embeddings

List of objects†

[{"embedding.vector": [...], "embedding.text": "hello"}]

List of embedding objects including text and vector data

embedding.model_name

String

"BERT-base"

Name of the embedding model used

embedding.text

String

"hello world"

The text represented in the embedding

embedding.vector

List of floats

[0.123, 0.456, ...]

The embedding vector consisting of a list of floats

exception.escaped

Boolean

true

Indicator if the exception has escaped the span's scope

exception.message

String

"Null value encountered"

Detailed message describing the exception

exception.stacktrace

String

"at app.main(app.java:16)"

The stack trace of the exception

exception.type

String

"NullPointerException"

The type of exception that was thrown

input.mime_type

String

"text/plain" or "application/json"

MIME type representing the format of input.value

input.value

String

"{'query': 'What is the weather today?'}"

The input value to an operation

llm.function_call

JSON String

"{function_name: 'add', args: [1, 2]}"

Object recording details of a function call in models or APIs

llm.input_messages

List of objects†

[{"message.role": "user", "message.content": "hello"}]

List of messages sent to the LLM in a chat API request

llm.invocation_parameters

JSON string

"{model_name: 'gpt-3', temperature: 0.7}"

Parameters used during the invocation of an LLM or API

llm.model_name

String

"gpt-3.5-turbo"

The name of the language model being utilized

llm.output_messages

List of objects†

[{"message.role": "user", "message.content": "hello"}]

List of messages received from the LLM in a chat API request

llm.prompt_template.template

String

"Weather forecast for {city} on {date}"

Template used to generate prompts as Python f-strings

llm.prompt_template.variables

JSON String

{ context: "<context from retrieval>", subject: "math" }

JSON of key value pairs applied to the prompt template

llm.prompt_template.version

String

"v1.0"

The version of the prompt template

llm.token_count.completion

Integer

15

The number of tokens in the completion

llm.token_count.prompt

Integer

5

The number of tokens in the prompt

llm.token_count.total

Integer

20

Total number of tokens, including prompt and completion

message.content

String

"What's the weather today?"

The content of a message in a chat

message.function_call_arguments_json

JSON String

"{ 'x': 2 }"

The arguments to the function call in JSON

message.function_call_name

String

"multiply" or "subtract"

Function call function name

message.role

String

"user" or "system"

Role of the entity in a message (e.g., user, system)

message.tool_calls

List of objects†

[{"tool_call.function.name": "get_current_weather"}]

List of tool calls (e.g. function calls) generated by the LLM

metadata

JSON String

"{'author': 'John Doe', 'date': '2023-09-09'}"

Metadata associated with a span

openinference.span.kind

String

"CHAIN"

The kind of span (e.g., CHAIN, LLM, RETRIEVER, RERANKER)

output.mime_type

String

"text/plain" or "application/json"

MIME type representing the format of output.value

output.value

String

"Hello, World!"

The output value of an operation

reranker.input_documents

List of objects†

[{"document.id": "1", "document.score": 0.9, "document.content": "..."}]

List of documents as input to the reranker

reranker.model_name

String

"cross-encoder/ms-marco-MiniLM-L-12-v2"

Model name of the reranker

reranker.output_documents

List of objects†

[{"document.id": "1", "document.score": 0.9, "document.content": "..."}]

List of documents outputted by the reranker

reranker.query

String

"How to format timestamp?"

Query parameter of the reranker

reranker.top_k

Integer

Top K parameter of the reranker

retrieval.documents

List of objects†

[{"document.id": "1", "document.score": 0.9, "document.content": "..."}]

List of retrieved documents

session.id

String

"26bcd3d2-cad2-443d-a23c-625e47f3324a"

Unique identifier for a session

tag.tags

List of strings

["shopping", "travel"]

List of tags to give the span a category

tool.description

String

"An API to get weather data."

Description of the tool's purpose and functionality

tool.name

String

"WeatherAPI"

The name of the tool being utilized

tool.parameters

JSON string

"{ 'a': 'int' }"

The parameters definition for invoking the tool

tool_call.function.arguments

JSON string

"{'city': 'London'}"

The arguments for the function being invoked by a tool call

tool_call.function.name

String

get_current_weather

The name of the function being invoked by a tool call

user.id

String

"9328ae73-7141-4f45-a044-8e06192aa465"

Unique identifier for a user

Using semantic conventions

Here is an example implementation of a semantic convention. Simply treat them as a string when setting an attribute on a span:

% pip install openinference-semantic-conventions

from openinference.semconv.trace import (
    SpanAttributes,
    OpenInferenceSpanKindValues,
)

def chat(message: str): 
    with tracer.start_as_current_span("an_llm_span") as span:
        span.set_attribute(
            SpanAttributes.OPENINFERENCE_SPAN_KIND,
            OpenInferenceSpanKindValues.LLM.value
        )
        
        # Same as:
        # span.set_attribute(
        #     "openinference.span.kind",
        #     "LLM",
        # )
        
        span.set_attribute(
            SpanAttributes.INPUT_VALUE,
            message,
        )

The reference for Typescript semantic conventions can be found here:

npm install @arizeai/openinference-semantic-conventions

import { Span } from "@opentelemetry/api";
import {
    SemanticConventions,
    OpenInferenceSpanKind,
} from "@arizeai/openinference-semantic-conventions";

export function chat(message: string) {
    // Create a span. A span must be closed.
    tracer.startActiveSpan(
        "an_llm_span",
        (span: Span) => {
            // Set the type of span:
            span.setAttributes({
                [SemanticConventions.OPENINFERENCE_SPAN_KIND]: OpenInferenceSpanKind.llm,
            });
    
            // Same as: 
            // span.setAttributes({
            //     ["openinference.span.kind"]: "llm",
            // });             
            
            // Set the input
            span.setAttributes({
                [SemanticConventions.INPUT_VALUE]: message,
            });
            do_work(message)         
            
            span.end();
        }
    }
}

Transforming messages into OpenTelemetry span attributes

To get a list of objects exported as OpenTelemetry span attributes, flattening of the list is necessary.

If the objects are further nested, flattening should continue until the attribute values are either simple values, i.e. bool, str, bytes, int, float or simple lists, i.e. List[bool], List[str], List[bytes], List[int], List[float].

You can use the example code below to do so:

# List of messages from OpenAI or another LLM provider
messages = [{"message.role": "user", "message.content": "hello"},
            {"message.role": "assistant", "message.content": "hi"}]

# Assumed you have a span object already created
for i, obj in enumerate(messages):
    for key, value in obj.items():
        span.set_attribute(f"input.messages.{i}.{key}", value)

const messages = [{ "message.role": "user", "message.content": "hello", }, {
  "message.role": "assistant",
  "message.content": "hi",
}];

for (const [i, obj] of messages.entries()) {
  for (const [key, value] of Object.entries(obj)) {
    span.setAttribute(`input.messages.${i}.${key}`, value);
  }
}

Examples

Trace inputs and outputs

Trace function calls

Add metadata

Trace prompt templates & variables

Last updated 1 month ago

Was this helpful?