import pandas as pd
from arize.embeddings import EmbeddingGenerator, UseCases
# List available models
print(EmbeddingGenerator.list_pretrained_models())
# Create example data
df = pd.DataFrame({
"text": [
"The product quality is excellent.",
"Shipping was delayed by 3 days.",
"Customer service was very helpful.",
],
})
# Generate embeddings for NLP
generator = EmbeddingGenerator.from_use_case(
use_case=UseCases.NLP.SEQUENCE_CLASSIFICATION,
model_name="distilbert-base-uncased",
tokenizer_max_length=512,
batch_size=100,
)
df["text_vector"] = generator.generate_embeddings(text_col=df["text"])