Skip to main content
Log, query, and update LLM traces programmatically. Upload bulk traces or update evaluations and annotations after the fact.

Key Capabilities

  • Bulk upload traces from offline processing
  • Update evaluations asynchronously (LLM-as-judge patterns)
  • Add human feedback and annotations
  • Attach custom metadata for filtering and analysis
  • Export spans for offline analysis

Log Spans

Upload traces in bulk from offline processing or batch evaluation.
import pandas as pd

# Prepare spans DataFrame
spans_df = pd.DataFrame([
    {
        "context.span_id": "span-1",
        "context.trace_id": "trace-1",
        "name": "llm_call",
        "span_kind": "LLM",
        "start_time": "2024-01-15T10:00:00Z",
        "end_time": "2024-01-15T10:00:02Z",
        "attributes.llm.model_name": "gpt-4",
        "attributes.llm.input_messages": [...],
        "attributes.llm.output_messages": [...],
    },
])

# Optional: include evaluations
evals_df = pd.DataFrame([
    {
        "context.span_id": "span-1",
        "name": "Correctness",
        "label": "correct",
        "score": 1.0,
    },
])

# Log spans
response = client.spans.log(
    space_id="your-space-id",
    project_name="my-llm-app",
    dataframe=spans_df,
    evals_dataframe=evals_df,  # Optional
)

print(f"Logged {response.record_count} spans")

Log Spans Only

client.spans.log(
    space_id="your-space-id",
    project_name="my-llm-app",
    dataframe=spans_df,
)

Update Evaluations

Add or update evaluations for existing spans (useful for LLM-as-judge patterns).
import pandas as pd

evals_df = pd.DataFrame([
    {
        "context.span_id": "span-1",
        "name": "Relevance",
        "label": "relevant",
        "score": 0.95,
        "explanation": "The response directly answers the question.",
    },
    {
        "context.span_id": "span-2",
        "name": "Relevance",
        "label": "not_relevant",
        "score": 0.2,
    },
])

response = client.spans.update_evaluations(
    space_id="your-space-id",
    project_name="my-llm-app",
    dataframe=evals_df,
)

print(f"Updated {response.record_count} evaluations")

Batch Evaluation Pattern

# Run async LLM evaluations on existing traces
async def evaluate_traces():
    # Fetch traces to evaluate
    traces = fetch_recent_traces()

    # Run LLM-as-judge evaluations
    eval_results = []
    for trace in traces:
        score = await llm_judge.evaluate(trace)
        eval_results.append({
            "context.span_id": trace.span_id,
            "name": "Quality",
            "score": score,
        })

    # Upload evaluations
    evals_df = pd.DataFrame(eval_results)
    client.spans.update_evaluations(
        space_id="your-space-id",
        project_name="my-llm-app",
        dataframe=evals_df,
    )

Update Annotations

Add human feedback and annotations to spans.
import pandas as pd

annotations_df = pd.DataFrame([
    {
        "context.span_id": "span-1",
        "annotator_kind": "HUMAN",
        "label": "correct",
        "score": 1.0,
        "explanation": "Verified by human reviewer",
        "metadata": {"reviewer_id": "user-123"},
    },
])

response = client.spans.update_annotations(
    space_id="your-space-id",
    project_name="my-llm-app",
    dataframe=annotations_df,
)

print(f"Updated {response.record_count} annotations")

Update Metadata

Attach custom metadata to spans for filtering and analysis.
import pandas as pd

metadata_df = pd.DataFrame([
    {
        "context.span_id": "span-1",
        "metadata.customer_id": "cust-456",
        "metadata.experiment_version": "v2",
        "metadata.region": "us-west",
    },
])

response = client.spans.update_metadata(
    space_id="your-space-id",
    project_name="my-llm-app",
    dataframe=metadata_df,
)

print(f"Updated metadata for {response.record_count} spans")

Export Spans

Export spans for offline analysis, custom processing, or archival.
from datetime import datetime

start_time = datetime.strptime("2024-01-01", "%Y-%m-%d")
end_time = datetime.strptime("2026-01-01", "%Y-%m-%d")

# Export to DataFrame
df = client.spans.export_to_df(
    space_id="your-space-id",
    project_name="my-llm-app",
    start_time=start_time,
    end_time=end_time,
)

print(f"Exported {len(df)} spans")

Export to Parquet

client.spans.export_to_parquet(
    space_id="your-space-id",
    project_name="my-llm-app",
    start_time=start_time,
    end_time=end_time,
    output_path="./spans_export.parquet",
)
Export capabilities:
  • Time-range filtering
  • DataFrame or Parquet output
  • Efficient Arrow Flight transport for large exports
  • Progress bars for long-running exports