from arize.experiments import (
ExperimentTaskResultFieldNames,
EvaluationResultFieldNames,
)
experiment_runs = [
{
"example_id": "ex-1",
"output": "Paris is the capital of France",
"latency_ms": 245,
"correctness_score": 1.0,
"correctness_label": "correct",
},
{
"example_id": "ex-2",
"output": "William Shakespeare wrote Romeo and Juliet",
"latency_ms": 198,
"correctness_score": 1.0,
"correctness_label": "correct",
},
]
task_fields = ExperimentTaskResultFieldNames(
example_id="example_id",
output="output",
)
evaluator_columns = {
"Correctness": EvaluationResultFieldNames(
score="correctness_score",
label="correctness_label",
)
}
experiment = client.experiments.create(
name="pre-computed-experiment",
dataset_id="dataset-id",
experiment_runs=experiment_runs,
task_fields=task_fields,
evaluator_columns=evaluator_columns,
)