{"@type":"StructuredNewsArticle","access":{"license":"neupai_standard","structured_data":"free","full_text_access":null,"full_text_available":false,"attribution_required":true},"content":{"claims":[{"id":"c1","type":"fact","as_of":"2026-05","figures":null,"insight":null,"as_of_raw":"2026년 5월","statement":"메타 FAIR·스탠퍼드대·하버드대 공동 연구팀이 2026년 5월 프로그램벤치 벤치마크 논문을 공개했다","comparison":null,"expiry_hint":null,"source_type":"research_paper","as_of_explicit":true},{"id":"c2","type":"fact","as_of":"2026-05","figures":null,"insight":null,"as_of_raw":"2026년 5월","statement":"연구팀은 FFmpeg, SQLite, PHP 인터프리터 등 실제 현장에서 쓰이는 핵심 프로그램 200종을 과제로 선정했다","comparison":null,"expiry_hint":null,"source_type":"research_paper","as_of_explicit":false},{"id":"c3","type":"fact","as_of":"2026-05","figures":null,"insight":null,"as_of_raw":"2026년 5월","statement":"현존 최고 수준의 언어 모델 9종을 투입했다","comparison":null,"expiry_hint":null,"source_type":"research_paper","as_of_explicit":false},{"id":"c4","type":"fact","as_of":"2026-05","figures":null,"insight":null,"as_of_raw":"2026년 5월","statement":"연구팀은 총 24만 8853개의 동작 테스트로 AI들의 코드를 검증했다","comparison":null,"expiry_hint":null,"source_type":"research_paper","as_of_explicit":false},{"id":"c5","type":"fact","as_of":"2026-05","figures":null,"insight":null,"as_of_raw":"2026년 5월","statement":"단 한 개의 모델도 200개 과제 중 하나를 완전히 풀어내지 못했다","comparison":null,"expiry_hint":null,"source_type":"research_paper","as_of_explicit":false},{"id":"c6","type":"fact","as_of":"2026-05","figures":null,"insight":null,"as_of_raw":"2026년 5월","statement":"가장 성적이 좋은 모델조차 200개 중 6개 과제에서만 테스트의 95%를 통과하는 수준에 그쳤다","comparison":null,"expiry_hint":null,"source_type":"research_paper","as_of_explicit":false},{"id":"c7","type":"fact","as_of":"2024","figures":null,"insight":null,"as_of_raw":"2024년","statement":"리처드 서튼 앨버타대 교수는 2024년 ACM A.M. 튜링상 수상자다","comparison":null,"expiry_hint":null,"source_type":"company_disclosure","as_of_explicit":true},{"id":"c8","type":"fact","as_of":"2025-03","figures":null,"insight":null,"as_of_raw":"2025년 3월","statement":"서튼 교수와 앤드류 바르토 매사추세츠대 명예교수는 2025년 3월 수상자로 선정됐다","comparison":null,"expiry_hint":null,"source_type":"company_disclosure","as_of_explicit":true},{"id":"c9","type":"fact","as_of":"2019","figures":null,"insight":null,"as_of_raw":"2019년","statement":"서튼 교수는 2019년 '쓴 교훈' 에세이를 썼다","comparison":null,"expiry_hint":null,"source_type":"research_paper","as_of_explicit":true}],"topics":["AI","프로그래밍","연구","기술"],"summary":"메타·스탠퍼드·하버드 공동 연구팀이 실시한 프로그램벤치 테스트에서 세계 최고 AI 9종이 200개 프로그래밍 과제를 완전히 해결한 경우는 0%였다. 강화학습 창시자 리처드 서튼 교수는 LLM이 막다른 길이라며 세계와 상호작용하는 새로운 AI 패러다임이 필요하다고 주장했다.","entities":[{"name":"메타 FAIR","type":"organization","metadata":{"parent":null,"ticker":null},"canonical_id":"org:us:meta-fair","role_in_article":"primary_subject"},{"name":"스탠퍼드대","type":"organization","metadata":{"parent":null,"ticker":null},"canonical_id":"org:us:stanford-university","role_in_article":"primary_subject"},{"name":"하버드대","type":"organization","metadata":{"parent":null,"ticker":null},"canonical_id":"org:us:harvard-university","role_in_article":"primary_subject"},{"name":"클로드 오퍼스","type":"product","metadata":{"parent":null,"ticker":null},"canonical_id":"product:us:claude-opus","role_in_article":"mentioned"},{"name":"GPT","type":"product","metadata":{"parent":null,"ticker":null},"canonical_id":"product:us:gpt","role_in_article":"mentioned"},{"name":"제미나이 프로","type":"product","metadata":{"parent":null,"ticker":null},"canonical_id":"product:us:gemini-pro","role_in_article":"mentioned"},{"name":"리처드 서튼","type":"person","metadata":{"parent":null,"ticker":null},"canonical_id":"person:ca:richard-sutton","role_in_article":"quoted"},{"name":"앨버타대","type":"organization","metadata":{"parent":null,"ticker":null},"canonical_id":"org:ca:university-of-alberta","role_in_article":"mentioned"},{"name":"구글 딥마인드","type":"organization","metadata":{"parent":null,"ticker":null},"canonical_id":"org:us:google-deepmind","role_in_article":"mentioned"},{"name":"오픈AI","type":"company","metadata":{"parent":null,"ticker":null},"canonical_id":"corp:us:openai","role_in_article":"mentioned"},{"name":"일리야 수츠케버","type":"person","metadata":{"parent":null,"ticker":null},"canonical_id":"person:us:ilya-sutskever","role_in_article":"quoted"}],"headline":"세계 최고 AI 9종 시험 봤더니…200개 과제 완전 정복, 단 하나도 없었다","geography":["US","KR"],"ai_emotional_context":{"arousal":0,"valence":0,"primary_emotions":[],"emotional_triggers":[],"secondary_emotions":[]}},"@context":"https://neupai.io/schema/v0.2","identity":{"ai_url":null,"author":"정재엽 기자","language":"ko","publisher":{"name":"테크42","type":"online","domain":"www.tech42.co.kr"},"article_id":"tech42_20260511_ai-programming-benchmark-zero-success","updated_at":null,"originality":"self_produced","article_type":"analysis","published_at":"2026-05-11T23:09:14.000Z","canonical_url":"https://www.tech42.co.kr/%ec%84%b8%ea%b3%84-%ec%b5%9c%ea%b3%a0-ai-9%ec%a2%85-%ec%8b%9c%ed%97%98-%eb%b4%a4%eb%8d%94%eb%8b%88200%ea%b0%9c-%ea%b3%bc%ec%a0%9c-%ec%99%84%ec%a0%84-%ec%a0%95%eb%b3%b5-%eb%8b%a8-%ed%95%98/?utm_source=rss&utm_medium=rss&utm_campaign=%25ec%2584%25b8%25ea%25b3%2584-%25ec%25b5%259c%25ea%25b3%25a0-ai-9%25ec%25a2%2585-%25ec%258b%259c%25ed%2597%2598-%25eb%25b4%25a4%25eb%258d%2594%25eb%258b%2588200%25ea%25b0%259c-%25ea%25b3%25bc%25ec%25a0%259c-%25ec%2599%2584%25ec%25a0%2584-%25ec%25a0%2595%25eb%25b3%25b5-%25eb%258b%25a8-%25ed%2595%2598"},"temporal":{"freshness":"recent","next_update_expected":null},"provenance":{"source_chain":["primary_reporting"],"related_articles":[],"original_source_url":null}}