{"schema_version":"v0.1","generated_at":"2026-06-22 02:03 UTC","methodology_version":"v3.2","license":"CC BY 4.0 — https://creativecommons.org/licenses/by/4.0/","name":"AgentBench","repo":"THUDM/AgentBench","display_repo":"","url":"https://github.com/THUDM/AgentBench","rank":297,"previous_rank":297,"rank_delta":0,"stars":3506,"stars_fmt":"3.5k","forks":263,"forks_fmt":"263","last_push":"2026-02-08","days_ago":134,"weekly_commits":0,"commits_low_confidence":false,"score":39.1,"description":"A Comprehensive Benchmark to Evaluate LLMs as Agents (ICLR'24)","language":"Python","open_issues":74,"category":"Observability & Evaluation","category_rank":20,"npm_package":"","crate_package":"","pypi_package":"","weekly_downloads":null,"dl_source":"","hn_mentions_30d":null,"has_provenance":false,"npm_provenance":null,"pypi_provenance":null,"signed_commits_ratio":0.507,"scorecard_score":null,"scorecard_checks":{},"scorecard_scanned_at":null,"slug":"agentbench","source_note":"","public_actions":null,"mcp_server_support":{"status":"none","confidence":null,"evidence":[]},"external_service_dependencies":{"providers":["Anthropic","OpenAI","Redis"],"requires_api_keys":false,"confidence":"high","evidence":["README/docs mention OpenAI","Found Anthropic dependency 'anthropic' in requirements.txt","README/docs mention Redis"]},"tool_plugin_surface":{"plugin_system":"extension-based","tool_tags":["browser","code","database","shell"],"confidence":"high","evidence":["README/docs mention a extension-based plugin/integration surface","Found browser dependency 'selenium' in src/server/tasks/webshop/requirements.txt","README/docs mention shell capabilities","README/docs mention database capabilities","README/docs mention code capabilities"]},"package_provenance_drift":{"status":"not_applicable","confidence":null,"summary":"No package source configured","evidence":[]},"evidence_grade":"D","listing_status":"listed","display_listing_status":"listed","display_status_label":"Listed","has_warning":false,"warning_reasons":[],"license_spdx":"Apache-2.0","license_type":"open","license_override":"","trust_score":16.7,"trust_score_v2":13.9,"rank_v2":297,"rank_v2_delta":0,"trust_v2_adjustment":-2.8,"trust_confidence":0.5,"trust_breakdown":{"safety":2.5,"identity":10.8,"transparency":8.5,"maintenance":3.1,"adoption":8.5},"trust_v2_breakdown":{"mcp":0.0,"external_dependencies":-1.0,"tool_plugin_surface":-1.8,"package_provenance_drift":0.0},"pending_signals":false,"trust_credential":{"spec":"https://hvtracker.net/spec/trust-credential/v0.2","version":"0.2","issuer":"hvtracker.net","subject":{"repo":"THUDM/AgentBench","slug":"agentbench","agent_url":"https://hvtracker.net/agents/agentbench"},"methodology_version":"v3.2","issued_at":"2026-06-22 02:03 UTC","expires_at":"2026-06-29T02:03:26Z","trust_score":16.7,"confidence":0.5,"evidence_grade":"D","dimensions":{"safety":2.5,"identity":10.8,"transparency":8.5,"maintenance":3.1,"adoption":8.5},"listing_status":"listed","evidence_hash":"f09be37afc337b2e3b5d0dc82ff496e5bd3d8ed729763bbfc200221fc1ae4865","signature":"o0gcwbQjGB/1k9PU5Ad8Fi0xtF/hGeNU430DQbxg3Ub9+5NtEIBryeJyOhmc+STNK/AuHH6r4TmYe5KFqITdAw=="},"history":[{"date":"2026-05-23","rank":51,"score":43.0,"trust_score":null,"evidence_grade":null,"stars":3446},{"date":"2026-05-24","rank":83,"score":42.8,"trust_score":null,"evidence_grade":null,"stars":3449},{"date":"2026-06-21","rank":297,"score":39.1,"trust_score":16.7,"evidence_grade":"D","stars":3506},{"date":"2026-06-22","rank":297,"score":39.1,"trust_score":16.7,"evidence_grade":"D","stars":3506}],"events":[{"date":"2026-05-24","type":"rank_changed","reason_code":"rank_down","label":"Rank Moved","short_label":"Rank","tone":"neutral","detail":"Rank dropped 32 spots (#51 → #83)"},{"date":"2026-05-25","type":"delisted","reason_code":"delisted","label":"Removed From Active Tracking","short_label":"Removed","tone":"negative","detail":"Removed from active tracking"},{"date":"2026-06-21","type":"listed","reason_code":"listed","label":"Newly Listed","short_label":"New","tone":"positive","detail":"First tracked at rank #297"}],"recent_changes":[{"date":"2026-05-24","type":"rank_changed","reason_code":"rank_down","label":"Rank Moved","short_label":"Rank","tone":"neutral","detail":"Rank dropped 32 spots (#51 → #83)"},{"date":"2026-05-25","type":"delisted","reason_code":"delisted","label":"Removed From Active Tracking","short_label":"Removed","tone":"negative","detail":"Removed from active tracking"},{"date":"2026-06-21","type":"listed","reason_code":"listed","label":"Newly Listed","short_label":"New","tone":"positive","detail":"First tracked at rank #297"}]}