BEGIN:VCALENDAR
VERSION:2.0
PRODID:-//Arize AI - ECPv6.15.20//NONSGML v1.0//EN
CALSCALE:GREGORIAN
METHOD:PUBLISH
X-ORIGINAL-URL:https://arize.com
X-WR-CALDESC:Events for Arize AI
REFRESH-INTERVAL;VALUE=DURATION:PT1H
X-Robots-Tag:noindex
X-PUBLISHED-TTL:PT1H
BEGIN:VTIMEZONE
TZID:America/Los_Angeles
BEGIN:DAYLIGHT
TZOFFSETFROM:-0800
TZOFFSETTO:-0700
TZNAME:PDT
DTSTART:20230312T100000
END:DAYLIGHT
BEGIN:STANDARD
TZOFFSETFROM:-0700
TZOFFSETTO:-0800
TZNAME:PST
DTSTART:20231105T090000
END:STANDARD
BEGIN:DAYLIGHT
TZOFFSETFROM:-0800
TZOFFSETTO:-0700
TZNAME:PDT
DTSTART:20240310T100000
END:DAYLIGHT
BEGIN:STANDARD
TZOFFSETFROM:-0700
TZOFFSETTO:-0800
TZNAME:PST
DTSTART:20241103T090000
END:STANDARD
BEGIN:DAYLIGHT
TZOFFSETFROM:-0800
TZOFFSETTO:-0700
TZNAME:PDT
DTSTART:20250309T100000
END:DAYLIGHT
BEGIN:STANDARD
TZOFFSETFROM:-0700
TZOFFSETTO:-0800
TZNAME:PST
DTSTART:20251102T090000
END:STANDARD
END:VTIMEZONE
BEGIN:VEVENT
DTSTART;TZID=America/Los_Angeles:20240213T173000
DTEND;TZID=America/Los_Angeles:20240213T203000
DTSTAMP:20260421T062830
CREATED:20240129T233013Z
LAST-MODIFIED:20240129T233013Z
UID:10000478-1707845400-1707856200@arize.com
SUMMARY:Evaluating LLMs: Needle in a Haystack
DESCRIPTION:​LLM evaluation is a discipline where confusion reigns and foundation model builders are effectively grading their own homework. \n​Building on the viral threads on X/Twitter\,  Greg Kamradt\, Robert Nishihara\, and Jason Lopatecki discuss highlights from Arize AI’s ongoing research on how major foundation models – from OpenAI’s GPT-4 to Mistral and Anthropic’s Claude – are stacking up against each other at important tasks and emerging LLM use cases\, covering and explaining the importance of results of Needle in a Haystack tests and other evals results on hallucination detection on private data\, question-and-answer\, code functionality\, and more. \n​Curious which foundation models your company should be using for a specific use case – and which to avoid? You won’t want to miss this meetup!
URL:https://arize.com/community-events/evaluating-llms-needle-in-a-haystack/
LOCATION:San Francisco\, United States
ATTACH;FMTTYPE=image/jpeg:https://arize.com/wp-content/uploads/2024/01/Evaluating-LLMs-Needle-in-a-Haystack-resource-image.jpg
END:VEVENT
END:VCALENDAR