from arize.pandas.logger import Client, Schema
from arize.utils.types import ModelTypes, Environments, EmbeddingColumnNames
API_KEY = 'ARIZE_API_KEY'
SPACE_ID = 'YOUR SPACE ID'
arize_client = Client(space_id=SPACE_ID, api_key=API_KEY)
# Declare which columns are the feature columns
feature_column_names=[
"MERCHANT_TYPE",
"ENTRY_MODE",
"STATE",
"MEAN_AMOUNT",
"STD_AMOUNT",
"TX_AMOUNT",
]
# feature & tag columns can be optionally defined with typing:
tag_columns = TypedColumns(
inferred=["name"],
to_int=["zip_code", "age"]
)
# Declare embedding feature columns
embedding_feature_column_names = {
# Dictionary keys will be the name of the embedding feature in the app
"embedding_display_name": EmbeddingColumnNames(
vector_column_name="text_vector", # column name of the vectors, required
data_column_name="text", # column name of the raw data vectors are representing, optional
)
}
# Defina the Schema, including embedding information
schema = Schema(
prediction_id_column_name="prediction_id",
timestamp_column_name="prediction_ts",
prediction_label_column_name="PREDICTION",
prediction_score_column_name="PREDICTION_SCORE",
actual_label_column_name="ACTUAL",
actual_score_column_name="ACTUAL_SCORE",
feature_column_names=feature_column_names,
embedding_feature_column_names=embedding_feature_column_names,
tag_column_names=tag_columns,
)
# Log the dataframe with the schema mapping
response = arize_client.log(
model_id="sample-model-1",
model_version= "v1",
model_type=ModelTypes.SCORE_CATEGORICAL,
environment=Environments.PRODUCTION,
dataframe=test_dataframe,
schema=schema,
)