import { openai } from "@ai-sdk/openai";
import { createFaithfulnessEvaluator } from "@arizeai/phoenix-evals";
import { createOrGetDataset } from "@arizeai/phoenix-client/datasets";
import {
asExperimentEvaluator,
runExperiment,
} from "@arizeai/phoenix-client/experiments";
await createOrGetDataset({
name: "support-eval",
examples: [
{
input: {
question: "Is Phoenix open source?",
context: "Phoenix is open source.",
},
output: {
answer: "Phoenix is open source.",
},
},
],
});
const faithfulness = createFaithfulnessEvaluator({
model: openai("gpt-4o-mini"),
});
await runExperiment({
dataset: { datasetName: "support-eval" },
task: async ({ question, context }) =>
`${question} Answer using only this context: ${context}`,
evaluators: [
asExperimentEvaluator({
name: "faithfulness",
kind: "LLM",
evaluate: async ({ input, output }) =>
faithfulness.evaluate({
input: String(input.question ?? ""),
context: String(input.context ?? ""),
output: String(output ?? ""),
}),
}),
],
});