{
 "_meta": {
  "title": "benchr verified model figures",
  "purpose": "SINGLE SOURCE OF TRUTH for every model number on benchr. Charts, slides, and article pages must read figures from this file, not from prose. Each figure was confirmed against the provider's OWN official source on the verifiedDate shown.",
  "rules": [
   "Official provider source only (the company's own announcement, docs, pricing page, or model card). Third-party blogs/aggregators/leaderboards never count as a source.",
   "null means the figure could NOT be confirmed from an official source. null is never a guess — it is an honest gap. Read the matching note.",
   "Do not copy numbers from benchr article pages; they may contain errors. This file is the independent, verified record.",
   "Prices are USD per 1,000,000 tokens unless a field name says otherwise (e.g. perImage)."
  ],
  "verifiedDate": "2026-05-31",
  "reVerifiedDate": "2026-06-12",
  "reVerificationNote": "2026-06-12 re-verification pass: every tracked commercial price re-read live against the provider's own pricing page — NO price changes found (Anthropic, OpenAI, Google, xAI, DeepSeek, Moonshot, Mistral all hold their June-10 levels). New official data filled in: Anthropic's pricing page now publishes per-model cache-hit ('Cache Hits & Refreshes') prices, so the previously-null cachedInputPerM fields are set for Opus 4.8/4.7 ($0.50), Sonnet 4.6 ($0.30), Haiku 4.5 ($0.10), and Mythos 5 ($1). The official deprecations table lists Claude Opus 4.6 as ACTIVE (not legacy) with tentative retirement floor Feb 5, 2027; tentativeRetirementFloor fields added for all active Anthropic models from the same table. Context windows re-confirmed via the pricing page's 1M long-context list (Fable 5, Mythos 5, Opus 4.8/4.7/4.6, Sonnet 4.6) and via model doc pages for GPT-5, GPT-5 Mini, Grok 4.3, DeepSeek V4 (both), Kimi K2.6, Mistral Large 3. Gemini 3.5 Pro is still NOT released (Google said 'next month' at I/O May 19; nothing official as of June 12) — do not add it. 2026-06-10 re-verification pass: added Claude Fable 5 + Claude Mythos 5 (released 2026-06-09; $10/$50, 1M context, 128K max output, confirmed on platform.claude.com and anthropic.com/news/claude-fable-5-mythos-5). Corrected the June 5 audit's 'phantom model' calls: GPT-5.4 (released 2026-03-05, $2.50/$15), Claude Opus 4.6 (released 2026-02-05, $5/$25, in the official legacy models table), Grok 4.20 (API 2026-03-10, $2/$6, 2M context, docs.x.ai), and Qwen3.5 (flagship 397B-A17B, 2026-02-16) are ALL REAL and now have entries. Also confirmed from the official Anthropic docs: Claude Sonnet 4 and Claude Opus 4 retire June 15, 2026; Claude Opus 4.1 retires August 5, 2026. GPT-5.3 Instant (ChatGPT default March 3 - May 5, 2026) was replaced by GPT-5.5 Instant; paid users keep it roughly three months from May 5. 2026-06-03 re-verification (benchr tool-suite fix pass): Claude Opus 4.8/4.7 and Sonnet 4.6 context windows re-confirmed as 1,000,000 tokens on platform.claude.com (200K only on Microsoft Foundry); Opus 4.8/4.7 max output 128K, Sonnet 4.6 max output 64K. DeepSeek V4-Pro pricing re-read DIRECTLY off api-docs.deepseek.com on 2026-06-03 = $0.435 in / $0.87 out (cache-hit $0.003625) — the post-promo level held; the $1.74/$3.48 figure circulating in third-party snippets is a stale pre-promo cache and is NOT what the live page shows. Mistral Medium 3.5 added (Modified MIT, open-weight, 256K, $1.50/$7.50). Phi-4 added (MIT, 14B, 16K, self-host, Dec 2024).",
  "sourceLegend": "Each model has a `sources` map. A figure's source = the URL in `sources` for that figure's category (pricing, benchmarks, context, release, license). All figures share the model-level `verifiedDate` unless a per-figure note says otherwise.",
  "currency": "USD",
  "note": "This file is distinct from assets/data/models.json, which powers the benchr tools with editorial 0-100 capability scores and latency estimates that are NOT official figures. Verified official numbers live here."
 },
 "models": [
  {
   "id": "claude-opus-4-8",
   "name": "Claude Opus 4.8",
   "provider": "Anthropic",
   "apiModelId": "claude-opus-4-8",
   "license": "proprietary",
   "releaseDate": "2026-05-28",
   "context": {
    "windowTokens": 1000000,
    "maxOutputTokens": 128000,
    "maxOutputTokensBeta": 300000
   },
   "pricing": {
    "inputPerM": 5.0,
    "outputPerM": 25.0,
    "fastModeInputPerM": 10.0,
    "fastModeOutputPerM": 50.0,
    "batchInputPerM": 2.5,
    "batchOutputPerM": 12.5,
    "cachedInputPerM": 0.5
   },
   "tentativeRetirementFloor": "2027-05-28",
   "benchmarks": {
    "SWE-bench Verified": 88.6,
    "SWE-bench Pro": 69.2,
    "SWE-bench Multilingual": 84.4,
    "SWE-bench Multimodal": 38.4,
    "Terminal-Bench 2.1": 74.6,
    "GPQA Diamond": 93.6,
    "OSWorld-Verified": 83.4,
    "BrowseComp (single-agent)": 84.3,
    "Humanity's Last Exam (no tools)": 49.8,
    "GDPval-AA (Elo)": 1890,
    "ARC-AGI-2": null
   },
   "sources": {
    "release": "https://www.anthropic.com/news/claude-opus-4-8",
    "pricing": "https://platform.claude.com/docs/en/about-claude/pricing",
    "context": "https://platform.claude.com/docs/en/about-claude/models/overview",
    "benchmarks": "https://www.anthropic.com/claude-opus-4-8-system-card"
   },
   "verifiedDate": "2026-06-12",
   "notes": "Standard price unchanged from Opus 4.7. The $10/$50 figure is the optional fast-mode rate (~2.5x output speed), NOT the base price; base is $5/$25. Cache-hit input $0.50 (0.1x base) now explicit on the official pricing page (confirmed June 12, 2026). Official tentative retirement floor: not sooner than May 28, 2027. ARC-AGI-2 is not in the official Opus 4.8 headline summary table, so it is null (do not state one). New Opus 4.7+ tokenizer can use up to ~35% more tokens per the pricing page."
  },
  {
   "id": "claude-opus-4-7",
   "name": "Claude Opus 4.7",
   "provider": "Anthropic",
   "apiModelId": "claude-opus-4-7",
   "license": "proprietary",
   "releaseDate": "2026-04-16",
   "context": {
    "windowTokens": 1000000,
    "maxOutputTokens": 128000,
    "maxOutputTokensBeta": 300000
   },
   "pricing": {
    "inputPerM": 5.0,
    "outputPerM": 25.0,
    "fastModeInputPerM": 30.0,
    "fastModeOutputPerM": 150.0,
    "batchInputPerM": 2.5,
    "batchOutputPerM": 12.5,
    "cachedInputPerM": 0.5
   },
   "tentativeRetirementFloor": "2027-04-16",
   "benchmarks": {
    "SWE-bench Verified": 87.6,
    "SWE-bench Pro": 64.3,
    "SWE-bench Multilingual": 80.5,
    "SWE-bench Multimodal": 34.5,
    "Terminal-Bench 2.0": 69.4,
    "GPQA Diamond": 94.2
   },
   "sources": {
    "release": "https://www.anthropic.com/news/claude-opus-4-7",
    "pricing": "https://platform.claude.com/docs/en/about-claude/pricing",
    "context": "https://platform.claude.com/docs/en/about-claude/models/overview",
    "benchmarks": "https://www.anthropic.com/claude-opus-4-7-system-card"
   },
   "verifiedDate": "2026-06-12",
   "notes": "Fast mode on 4.7 is $30/$150 (3x more expensive than Opus 4.8's $10/$50 fast mode). Cache-hit input $0.50 confirmed on the official pricing page June 12, 2026; official tentative retirement floor: not sooner than April 16, 2027. Opus 4.8 later restated Opus 4.7's OSWorld score upward (~82.x) after a test-harness fix."
  },
  {
   "id": "claude-sonnet-4-6",
   "name": "Claude Sonnet 4.6",
   "provider": "Anthropic",
   "apiModelId": "claude-sonnet-4-6",
   "license": "proprietary",
   "releaseDate": "2026-02-17",
   "context": {
    "windowTokens": 1000000,
    "maxOutputTokens": 64000,
    "maxOutputTokensBeta": 300000
   },
   "pricing": {
    "inputPerM": 3.0,
    "outputPerM": 15.0,
    "batchInputPerM": 1.5,
    "batchOutputPerM": 7.5,
    "cachedInputPerM": 0.3
   },
   "tentativeRetirementFloor": "2027-02-17",
   "benchmarks": {
    "SWE-bench Verified": 79.6,
    "SWE-bench Multilingual": 75.9,
    "Terminal-Bench 2.0": 59.1,
    "OSWorld-Verified": 72.5,
    "GPQA Diamond": 89.9,
    "MMMLU": 89.3,
    "AIME 2025 (no tools)": 95.6,
    "Humanity's Last Exam (no tools)": 33.2,
    "Humanity's Last Exam (with tools)": 49.0,
    "ARC-AGI-2": 58.3,
    "tau2-bench Telecom": 97.9,
    "tau2-bench Retail": 91.7,
    "GDPval-AA (Elo)": 1633
   },
   "sources": {
    "release": "https://www.anthropic.com/news/claude-sonnet-4-6",
    "pricing": "https://platform.claude.com/docs/en/about-claude/pricing",
    "context": "https://platform.claude.com/docs/en/about-claude/models/overview",
    "benchmarks": "https://www.anthropic.com/news/claude-sonnet-4-6"
   },
   "verifiedDate": "2026-06-12",
   "notes": "No fast-mode tier (fast mode is Opus-only). Cache-hit input $0.30 confirmed on the official pricing page June 12, 2026; official tentative retirement floor: not sooner than February 17, 2027. Benchmark values read from the official Sonnet 4.6 System Card (Table 2.1.A). SWE-bench Verified 79.6% averaged over 25 trials (80.2% with a stated prompt modification). Anthropic flags possible AIME 2025 contamination. Anthropic reports MMMLU, not plain MMLU."
  },
  {
   "id": "claude-haiku-4-5",
   "name": "Claude Haiku 4.5",
   "provider": "Anthropic",
   "apiModelId": "claude-haiku-4-5",
   "license": "proprietary",
   "releaseDate": "2025-10-15",
   "context": {
    "windowTokens": 200000,
    "maxOutputTokens": 64000,
    "maxOutputTokensBeta": null
   },
   "pricing": {
    "inputPerM": 1.0,
    "outputPerM": 5.0,
    "batchInputPerM": 0.5,
    "batchOutputPerM": 2.5,
    "cachedInputPerM": 0.1
   },
   "tentativeRetirementFloor": "2026-10-15",
   "benchmarks": {
    "SWE-bench Verified": 73.3,
    "Terminal-Bench (no thinking)": 40.21,
    "Terminal-Bench (32K thinking)": 41.75,
    "GPQA Diamond": null,
    "OSWorld-Verified": null,
    "AIME 2025": null,
    "MMMLU": null
   },
   "sources": {
    "release": "https://www.anthropic.com/news/claude-haiku-4-5",
    "pricing": "https://platform.claude.com/docs/en/about-claude/pricing",
    "context": "https://platform.claude.com/docs/en/about-claude/models/overview",
    "benchmarks": "https://www.anthropic.com/news/claude-haiku-4-5"
   },
   "verifiedDate": "2026-05-31",
   "notes": "Pinned snapshot claude-haiku-4-5-20251001. Pricing (incl. cache-hit $0.10) and tentative retirement floor (not sooner than October 15, 2026) re-confirmed on official pages June 12, 2026; the 200K context window stands from the May 31 verification. SWE-bench Verified 73.3% (avg of 50 trials, 128K thinking budget) and Terminal-Bench are the only headline numbers Anthropic publishes as readable official text; GPQA/OSWorld/AIME/MMMLU appear only inside a launch-page image, so they are null (not guessed)."
  },
  {
   "id": "claude-mythos-preview",
   "name": "Claude Mythos Preview",
   "provider": "Anthropic",
   "apiModelId": null,
   "license": "proprietary",
   "releaseDate": null,
   "context": {
    "windowTokens": 1000000,
    "maxOutputTokens": null,
    "maxOutputTokensBeta": null
   },
   "pricing": {
    "inputPerM": 25.0,
    "outputPerM": 125.0,
    "note": "Stated price for approved Project Glasswing participants only; not generally purchasable."
   },
   "benchmarks": {
    "SWE-bench Pro": 77.8
   },
   "availability": "RESTRICTED — research preview under Project Glasswing. Not generally available. Invitation-only (12 named launch partners + 40+ critical-infrastructure orgs). No self-serve sign-up; no public API model id.",
   "sources": {
    "release": "https://www.anthropic.com/glasswing",
    "pricing": "https://www.anthropic.com/glasswing",
    "context": "https://platform.claude.com/docs/en/about-claude/pricing",
    "benchmarks": "https://www.anthropic.com/glasswing"
   },
   "verifiedDate": "2026-05-31",
   "notes": "Anthropic: 'We do not plan to make Claude Mythos Preview generally available.' Purpose-built for defensive cybersecurity / vulnerability research. No GA date. 1M context confirmed only via the pricing page's long-context list. Frame any benchr page as restricted/not-for-public."
  },
  {
   "id": "gpt-5",
   "name": "GPT-5",
   "provider": "OpenAI",
   "apiModelId": "gpt-5",
   "license": "proprietary",
   "releaseDate": "2025-08-07",
   "context": {
    "windowTokens": 400000,
    "maxOutputTokens": 128000
   },
   "pricing": {
    "inputPerM": 1.25,
    "outputPerM": 10.0,
    "cachedInputPerM": 0.125
   },
   "benchmarks": {
    "SWE-bench Verified": 74.9,
    "AIME 2025": null,
    "GPQA Diamond": null,
    "HealthBench Hard": 46.2
   },
   "sources": {
    "release": "https://deploymentsafety.openai.com/gpt-5",
    "pricing": "https://developers.openai.com/api/docs/models/gpt-5",
    "context": "https://developers.openai.com/api/docs/models/gpt-5",
    "benchmarks": "https://cdn.openai.com/gpt-5-system-card.pdf"
   },
   "verifiedDate": "2026-06-12",
   "notes": "SWE-bench Verified 74.9% is officially OpenAI's launch-blog figure (verbosity=medium), confirmed via the system card PDF which cites it (p.36). AIME 2025 (~94.6%) and GPQA Diamond (~88.4%) are widely attributed to the launch page openai.com/index/introducing-gpt-5, which blocks automated fetchers (403); they could NOT be re-read from an accessible official source, so they are null. The 88.4% figure may refer to GPT-5 pro, not base GPT-5. OpenAI docs now label GPT-5 the previous model."
  },
  {
   "id": "gpt-5-5",
   "name": "GPT-5.5",
   "provider": "OpenAI",
   "apiModelId": "gpt-5.5",
   "license": "proprietary",
   "releaseDate": "2026-04-23",
   "context": {
    "windowTokens": 1050000,
    "maxOutputTokens": 128000
   },
   "pricing": {
    "inputPerM": 5.0,
    "outputPerM": 30.0,
    "cachedInputPerM": 0.5,
    "proInputPerM": null,
    "proOutputPerM": null,
    "extendedContextSurcharge": "For sessions >272K input tokens: 2x input, 1.5x output (standard/batch/flex)."
   },
   "benchmarks": {
    "HealthBench (length-adjusted)": 56.5,
    "HealthBench Professional": 51.8,
    "SWE-bench Verified": null,
    "SWE-bench Pro": null,
    "Terminal-Bench 2.0": null,
    "OSWorld-Verified": null
   },
   "sources": {
    "release": "https://deploymentsafety.openai.com/gpt-5-5",
    "pricing": "https://developers.openai.com/api/docs/models/gpt-5.5",
    "context": "https://developers.openai.com/api/docs/models/gpt-5.5",
    "benchmarks": "https://deploymentsafety.openai.com/gpt-5-5"
   },
   "verifiedDate": "2026-05-31",
   "notes": "Flagship GPT-5.5 (and GPT-5.5 Pro) announced Apr 23, 2026 (do not confuse with GPT-5.5 Instant, the ChatGPT default released May 5, 2026). Context is 1,050,000 (not a round 1M). HealthBench figures are the only official system-card benchmarks. GPT-5.5-pro pricing (~$30/$180) and the coding/agent numbers circulating (Terminal-Bench 82.7%, OSWorld 78.7%, SWE-bench Pro 58.6%) are vendor-relayed via the 403'd launch page and could NOT be read from an accessible official source — null until confirmable. The extended-context >272K surcharge IS officially confirmed on the model doc page."
  },
  {
   "id": "gpt-5-mini",
   "name": "GPT-5 Mini",
   "provider": "OpenAI",
   "apiModelId": "gpt-5-mini",
   "license": "proprietary",
   "releaseDate": "2025-08-07",
   "context": {
    "windowTokens": 400000,
    "maxOutputTokens": 128000
   },
   "pricing": {
    "inputPerM": 0.25,
    "outputPerM": 2.0,
    "cachedInputPerM": 0.025
   },
   "benchmarks": {},
   "sources": {
    "release": "https://developers.openai.com/api/docs/models/gpt-5-mini",
    "pricing": "https://developers.openai.com/api/docs/models/gpt-5-mini",
    "context": "https://developers.openai.com/api/docs/models/gpt-5-mini"
   },
   "verifiedDate": "2026-06-12",
   "notes": "Official price is $0.25/$2.00 (NOT $0.50/$4.00 as in the legacy benchr verified-table). Release date inferred from snapshot id gpt-5-mini-2025-08-07. No headline benchmark published on the model doc page (empty benchmarks object, not null figures)."
  },
  {
   "id": "gpt-image-2",
   "name": "ChatGPT Images 2.0 (GPT Image 2)",
   "provider": "OpenAI",
   "apiModelId": "gpt-image-2",
   "license": "proprietary",
   "releaseDate": "2026-04-21",
   "context": {
    "windowTokens": null,
    "maxOutputTokens": null
   },
   "pricing": {
    "imageInputPerM": 8.0,
    "cachedImageInputPerM": 2.0,
    "imageOutputPerM": 30.0,
    "textInputPerM": 5.0,
    "batchImageInputPerM": 4.0,
    "batchImageOutputPerM": 15.0,
    "perImage": null
   },
   "benchmarks": {},
   "sources": {
    "release": "https://developers.openai.com/api/docs/models/gpt-image-2",
    "pricing": "https://developers.openai.com/api/docs/pricing"
   },
   "verifiedDate": "2026-05-31",
   "notes": "OpenAI prices this model by TOKENS, not per image. There is NO official per-image list price; per-image figures like ~$0.006 (low) / ~$0.053 (medium) / ~$0.211 (high) for 1024x1024 are third-party calculator estimates, so perImage is null. Release date inferred from snapshot gpt-image-2-2026-04-21. Supersedes GPT Image 1."
  },
  {
   "id": "gemini-3-pro",
   "name": "Gemini 3 Pro",
   "provider": "Google",
   "apiModelId": "gemini-3-pro-preview",
   "license": "proprietary",
   "releaseDate": "2025-11-18",
   "status": "DEPRECATED — shut down 2026-03-09; replaced by Gemini 3.1 Pro.",
   "context": {
    "windowTokens": 1048576,
    "maxOutputTokens": 65536
   },
   "pricing": {
    "inputPerM": null,
    "outputPerM": null,
    "note": "No longer on the live official pricing page (model retired). When live it was $2/$12 (<=200K) and $4/$18 (>200K)."
   },
   "benchmarks": {
    "SWE-bench Verified": 76.2,
    "GPQA Diamond": 91.9,
    "Humanity's Last Exam (no tools)": 37.5,
    "Terminal-Bench 2.0": 54.2,
    "LMArena (Elo)": 1501,
    "SimpleQA Verified": 72.1,
    "MMMU-Pro": 81.0,
    "MathArena Apex": 23.4
   },
   "sources": {
    "release": "https://blog.google/products-and-platforms/products/gemini/gemini-3/",
    "status": "https://ai.google.dev/gemini-api/docs/deprecations",
    "context": "https://ai.google.dev/gemini-api/docs/models/gemini-3-pro-preview",
    "benchmarks": "https://blog.google/products-and-platforms/products/gemini/gemini-3/"
   },
   "verifiedDate": "2026-05-31",
   "notes": "Live price is null because the retired model is no longer on the official pricing page (historical $2/$12 kept only as a note). Benchmarks from the Gemini 3 launch blog. Max output 65,536 on the model spec page (an older guide page said 64,000)."
  },
  {
   "id": "gemini-3-1-pro",
   "name": "Gemini 3.1 Pro",
   "provider": "Google",
   "apiModelId": "gemini-3.1-pro-preview",
   "license": "proprietary",
   "releaseDate": "2026-02-19",
   "status": "Preview (GA coming soon).",
   "context": {
    "windowTokens": 1000000,
    "maxOutputTokens": 64000
   },
   "pricing": {
    "inputPerM": 2.0,
    "outputPerM": 12.0,
    "inputAbove200kPerM": 4.0,
    "outputAbove200kPerM": 18.0,
    "batchInputPerM": 1.0,
    "batchOutputPerM": 6.0,
    "batchInputAbove200kPerM": 2.0,
    "batchOutputAbove200kPerM": 9.0,
    "freeApiTier": false
   },
   "benchmarks": {
    "ARC-AGI-2": 77.1,
    "GPQA Diamond": 94.3,
    "Humanity's Last Exam (with tools)": 51.4,
    "MMMU-Pro": 80.5,
    "SWE-bench Verified": 80.6,
    "MMMLU": 92.6
   },
   "sources": {
    "release": "https://blog.google/innovation-and-ai/models-and-research/gemini-models/gemini-3-1-pro/",
    "pricing": "https://ai.google.dev/gemini-api/docs/pricing",
    "context": "https://deepmind.google/models/model-cards/gemini-3-1-pro/",
    "benchmarks": "https://deepmind.google/models/model-cards/gemini-3-1-pro/"
   },
   "verifiedDate": "2026-05-31",
   "notes": "Tiered pricing: the >200K-token tier doubles input ($4) and raises output ($18). Output price includes thinking tokens. No free API tier (free trial in AI Studio UI only)."
  },
  {
   "id": "gemini-3-5-flash",
   "name": "Gemini 3.5 Flash",
   "provider": "Google",
   "apiModelId": "gemini-3.5-flash",
   "license": "proprietary",
   "releaseDate": "2026-05-19",
   "status": "GA (stable).",
   "context": {
    "windowTokens": 1048576,
    "maxOutputTokens": 65536
   },
   "pricing": {
    "inputPerM": 1.5,
    "outputPerM": 9.0,
    "batchInputPerM": 0.75,
    "batchOutputPerM": 4.5,
    "freeApiTier": true,
    "note": "Flat rate — no >200K context tier."
   },
   "benchmarks": {
    "Terminal-Bench 2.1": 76.2,
    "GDPval-AA (Elo)": 1656,
    "MCP Atlas": 83.6,
    "CharXiv Reasoning": 84.2,
    "SWE-bench Verified": null,
    "GPQA Diamond": null
   },
   "sources": {
    "release": "https://ai.google.dev/gemini-api/docs/deprecations",
    "pricing": "https://ai.google.dev/gemini-api/docs/pricing",
    "context": "https://ai.google.dev/gemini-api/docs/models/gemini-3.5-flash",
    "benchmarks": "https://blog.google/innovation-and-ai/models-and-research/gemini-models/gemini-3-5/"
   },
   "verifiedDate": "2026-05-31",
   "notes": "Released at Google I/O (May 19, 2026), GA. Flat $1.50/$9 (not context-tiered). Free API tier available. The launch blog publishes only the four agentic/coding/multimodal benchmarks above; no official SWE-bench/GPQA for 3.5 Flash, so those are null."
  },
  {
   "id": "grok-4-3",
   "name": "Grok 4.3",
   "provider": "xAI",
   "apiModelId": "grok-4.3",
   "license": "proprietary",
   "releaseDate": null,
   "context": {
    "windowTokens": 1000000,
    "maxOutputTokens": null
   },
   "pricing": {
    "inputPerM": 1.25,
    "outputPerM": 2.5,
    "cachedInputPerM": 0.2
   },
   "benchmarks": {},
   "sources": {
    "pricing": "https://docs.x.ai/developers/models/grok-4.3",
    "context": "https://docs.x.ai/developers/models/grok-4.3"
   },
   "verifiedDate": "2026-06-12",
   "notes": "xAI officially publishes price ($1.25/$2.50, cached $0.20), 1M context, and the model id only. It publishes NO official release date, max output, or numeric benchmarks for Grok 4.3 (only a qualitative non-hallucination claim) — those are null. Figures like 'Intelligence Index 53' or 'tau2-bench Telecom 98%' are third-party (Artificial Analysis / leaderboards), not official xAI numbers."
  },
  {
   "id": "deepseek-v4-flash",
   "name": "DeepSeek-V4-Flash",
   "provider": "DeepSeek",
   "apiModelId": "deepseek-v4-flash",
   "license": "MIT",
   "releaseDate": "2026-04-24",
   "params": {
    "total": "284B",
    "active": "13B"
   },
   "context": {
    "windowTokens": 1000000,
    "maxOutputTokens": 384000
   },
   "pricing": {
    "inputPerM": 0.14,
    "outputPerM": 0.28,
    "cacheHitInputPerM": 0.0028,
    "selfHost": true
   },
   "benchmarks": {
    "SWE-bench Verified": 79.0,
    "GPQA Diamond": 88.1,
    "LiveCodeBench": 91.6,
    "MMLU-Pro (Think Max)": 86.2,
    "HMMT 2026 Feb": 94.8,
    "MRCR 1M": 78.7
   },
   "sources": {
    "release": "https://api-docs.deepseek.com/news/news260424",
    "pricing": "https://api-docs.deepseek.com/quick_start/pricing",
    "license": "https://huggingface.co/deepseek-ai/DeepSeek-V4-Flash",
    "benchmarks": "https://huggingface.co/deepseek-ai/DeepSeek-V4-Flash"
   },
   "verifiedDate": "2026-06-12",
   "notes": "Flash pricing is standard (no promo). Cache-hit input reduced to $0.0028 (1/10) effective 2026-04-26. Open weights, MIT. Benchmarks are DeepSeek's own model-card figures (Instruct / Think Max mode)."
  },
  {
   "id": "deepseek-v4-pro",
   "name": "DeepSeek-V4-Pro",
   "provider": "DeepSeek",
   "apiModelId": "deepseek-v4-pro",
   "license": "MIT",
   "releaseDate": "2026-04-24",
   "params": {
    "total": "1.6T",
    "active": "49B"
   },
   "context": {
    "windowTokens": 1000000,
    "maxOutputTokens": 384000
   },
   "pricing": {
    "inputPerM": 0.435,
    "outputPerM": 0.87,
    "cacheHitInputPerM": 0.003625,
    "selfHost": true,
    "priceChangeNote": "These rates are PERMANENT, not a temporary low. A 75% promo ends 2026-05-31 15:59 UTC, after which DeepSeek officially sets the price to 1/4 of the original — the SAME level as the promo. The price does NOT rise."
   },
   "benchmarks": {
    "SWE-bench Verified": 80.6,
    "SWE-bench Pro": 55.4,
    "GPQA Diamond": 90.1,
    "LiveCodeBench": 93.5,
    "Terminal-Bench 2.0": 67.9,
    "MMLU-Pro (Max)": 87.5,
    "MRCR 1M": 83.5,
    "Codeforces (rating)": 3206
   },
   "sources": {
    "release": "https://api-docs.deepseek.com/news/news260424",
    "pricing": "https://api-docs.deepseek.com/quick_start/pricing",
    "license": "https://huggingface.co/deepseek-ai/DeepSeek-V4-Pro",
    "benchmarks": "https://huggingface.co/deepseek-ai/DeepSeek-V4-Pro"
   },
   "verifiedDate": "2026-06-12",
   "notes": "RE-VERIFIED DIRECTLY on the official pricing page on 2026-06-03 (after the 2026-05-31 promo end): live price = $0.435 in / $0.87 out, cache-hit $0.003625 — the post-promo level held. Do NOT record a post-promo rise to $1.74/$3.48; that figure appears only in stale third-party search snippets, not on the live DeepSeek page. SWE-bench 80.6% is DeepSeek's own vendor-reported figure. Open weights, MIT."
  },
  {
   "id": "kimi-k2-6",
   "name": "Kimi K2.6",
   "provider": "Moonshot AI",
   "apiModelId": "kimi-k2.6",
   "license": "Modified MIT",
   "releaseDate": null,
   "params": {
    "total": "1T",
    "active": "32B",
    "experts": "384 (8 active/token)"
   },
   "context": {
    "windowTokens": 262144,
    "maxOutputTokens": null
   },
   "pricing": {
    "inputPerM": 0.95,
    "outputPerM": 4.0,
    "cacheHitInputPerM": 0.16,
    "selfHost": true
   },
   "benchmarks": {
    "SWE-bench Verified": 80.2,
    "SWE-bench Multilingual": 76.7,
    "SWE-bench Pro": 58.6,
    "Terminal-Bench 2.0": 66.7,
    "LiveCodeBench v6": 89.6,
    "AIME 2026": 96.4,
    "GPQA Diamond": 90.5,
    "Humanity's Last Exam (with tools)": 54.0,
    "OSWorld-Verified": 73.1
   },
   "sources": {
    "release": null,
    "pricing": "https://platform.kimi.ai/docs/pricing/chat-k26",
    "license": "https://huggingface.co/moonshotai/Kimi-K2.6/blob/main/LICENSE",
    "benchmarks": "https://huggingface.co/moonshotai/Kimi-K2.6"
   },
   "verifiedDate": "2026-06-12",
   "notes": "Official release date not stated on any official Moonshot page (third-party says Apr 20, 2026), so releaseDate is null. Context 262,144 (256K) confirmed on both pricing page and model card. Input $0.95 is cache-miss; cache-hit $0.16. Model card body: 1T total / 32B active (the org listing page rounds to 1.1T). Benchmarks are Moonshot's own model-card figures."
  },
  {
   "id": "mistral-large-3",
   "name": "Mistral Large 3",
   "provider": "Mistral AI",
   "apiModelId": "mistral-large-2512",
   "license": "Apache-2.0",
   "releaseDate": "2025-12-02",
   "params": {
    "total": "675B",
    "active": "41B"
   },
   "context": {
    "windowTokens": 256000,
    "maxOutputTokens": null
   },
   "pricing": {
    "inputPerM": 0.5,
    "outputPerM": 1.5,
    "cacheHitInputPerM": null,
    "selfHost": true
   },
   "benchmarks": {},
   "sources": {
    "release": "https://docs.mistral.ai/models/mistral-large-3-25-12",
    "pricing": "https://docs.mistral.ai/models/mistral-large-3-25-12",
    "license": "https://mistral.ai/news/mistral-3/",
    "benchmarks": "https://mistral.ai/news/mistral-3/"
   },
   "verifiedDate": "2026-06-12",
   "notes": "Open weights, Apache-2.0. Mistral's announcement gives only relative/leaderboard claims for Large 3, no discrete official per-benchmark scores (the ~85% AIME figure on the page belongs to a smaller reasoning variant, NOT Large 3) — so benchmarks is intentionally empty, not guessed. Cache-hit price not published."
  },
  {
   "id": "mistral-medium-3-5",
   "name": "Mistral Medium 3.5",
   "provider": "Mistral AI",
   "apiModelId": "mistral-medium-3-5",
   "license": "Modified MIT",
   "releaseDate": null,
   "params": {
    "total": "128B",
    "active": "128B",
    "type": "dense"
   },
   "context": {
    "windowTokens": 256000,
    "maxOutputTokens": null
   },
   "pricing": {
    "inputPerM": 1.5,
    "outputPerM": 7.5,
    "cacheHitInputPerM": null,
    "selfHost": true
   },
   "benchmarks": {},
   "sources": {
    "release": "https://mistral.ai/news/vibe-remote-agents-mistral-medium-3-5/",
    "pricing": "https://docs.mistral.ai/models/model-cards/mistral-medium-3-5-26-04",
    "license": "https://docs.mistral.ai/models/model-cards/mistral-medium-3-5-26-04",
    "context": "https://docs.mistral.ai/models/model-cards/mistral-medium-3-5-26-04"
   },
   "verifiedDate": "2026-06-03",
   "notes": "Verified 2026-06-03 against the official Mistral model card: Modified MIT license, open-weight (self-hostable on ~4 GPUs) AND offered as a hosted API, dense 128B, 256K context, $1.50/$7.50 per 1M. Official release date and max output tokens not stated on the card (null). No discrete official per-benchmark scores published — benchmarks intentionally empty."
  },
  {
   "id": "qwen-3-6-27b",
   "name": "Qwen3.6-27B (dense)",
   "provider": "Alibaba (Qwen)",
   "apiModelId": "Qwen/Qwen3.6-27B",
   "license": "Apache-2.0",
   "releaseDate": "2026-04-22",
   "params": {
    "total": "27B",
    "active": "27B",
    "type": "dense"
   },
   "context": {
    "windowTokens": 262144,
    "windowTokensExtended": 1010000,
    "maxOutputTokens": null
   },
   "pricing": {
    "selfHost": true,
    "inputPerM": null,
    "outputPerM": null
   },
   "benchmarks": {
    "SWE-bench Verified": 77.2,
    "SWE-bench Pro": 53.5,
    "Terminal-Bench 2.0": 59.3,
    "MMLU-Pro": 86.2,
    "GPQA Diamond": 87.8,
    "AIME 2026": 94.1,
    "MMMU": 82.9
   },
   "sources": {
    "release": "https://huggingface.co/Qwen/Qwen3.6-27B",
    "license": "https://huggingface.co/Qwen/Qwen3.6-27B",
    "benchmarks": "https://huggingface.co/Qwen/Qwen3.6-27B"
   },
   "verifiedDate": "2026-05-31",
   "notes": "Open weight, Apache-2.0, self-host (no per-token list price). 262,144 native context, extensible ~1M via YaRN. Benchmarks from the official HF model card."
  },
  {
   "id": "qwen-3-6-35b-a3b",
   "name": "Qwen3.6-35B-A3B (MoE)",
   "provider": "Alibaba (Qwen)",
   "apiModelId": "Qwen/Qwen3.6-35B-A3B",
   "license": "Apache-2.0",
   "releaseDate": "2026-04-16",
   "params": {
    "total": "35B",
    "active": "3B",
    "experts": "256 (8 routed + 1 shared)"
   },
   "context": {
    "windowTokens": 262144,
    "windowTokensExtended": 1010000,
    "maxOutputTokens": null
   },
   "pricing": {
    "selfHost": true,
    "inputPerM": null,
    "outputPerM": null
   },
   "benchmarks": {
    "SWE-bench Verified": 73.4,
    "SWE-bench Multilingual": 67.2,
    "Terminal-Bench 2.0": 51.5,
    "MMLU-Pro": 85.2,
    "AIME 2026": 92.7,
    "GPQA Diamond": 86.0,
    "MMMU": 81.7
   },
   "sources": {
    "release": "https://huggingface.co/Qwen/Qwen3.6-35B-A3B",
    "license": "https://huggingface.co/Qwen/Qwen3.6-35B-A3B",
    "benchmarks": "https://huggingface.co/Qwen/Qwen3.6-35B-A3B"
   },
   "verifiedDate": "2026-05-31",
   "notes": "Open weight, Apache-2.0, self-host. NOTE: a 'Qwen3.6-Plus' hosted/cloud flagship could NOT be confirmed on any official Alibaba/Qwen page (Model Studio still lists Qwen3.5-Plus); treat Qwen3.6-Plus pricing/specs as unverified. Only the two open-weight variants (27B, 35B-A3B) are officially confirmed."
  },
  {
   "id": "llama-4-scout",
   "name": "Llama 4 Scout",
   "provider": "Meta",
   "apiModelId": "meta-llama/Llama-4-Scout-17B-16E",
   "license": "Llama 4 Community License",
   "releaseDate": "2025-04-05",
   "params": {
    "total": "109B",
    "active": "17B",
    "experts": "16"
   },
   "context": {
    "windowTokens": 10000000,
    "maxOutputTokens": null
   },
   "pricing": {
    "selfHost": true,
    "inputPerM": null,
    "outputPerM": null
   },
   "benchmarks": {
    "MMLU-Pro (0-shot)": 74.3,
    "GPQA Diamond": 57.2,
    "MMMU": 73.4,
    "MathVista": 73.7,
    "LiveCodeBench": 32.8
   },
   "sources": {
    "release": "https://ai.meta.com/blog/llama-4-multimodal-intelligence/",
    "license": "https://www.llama.com/llama4/license/",
    "context": "https://www.llama.com/docs/model-cards-and-prompt-formats/llama4/",
    "benchmarks": "https://huggingface.co/meta-llama/Llama-4-Scout-17B-16E"
   },
   "verifiedDate": "2026-05-31",
   "notes": "Open weight; $0 to self-host (Meta sells no API). Community License: >700M MAU needs a separate Meta license; not OSI-approved. 10M-token context. Benchmarks from Meta's official instruction-tuned model card."
  },
  {
   "id": "llama-4-maverick",
   "name": "Llama 4 Maverick",
   "provider": "Meta",
   "apiModelId": "meta-llama/Llama-4-Maverick-17B-128E-Instruct",
   "license": "Llama 4 Community License",
   "releaseDate": "2025-04-05",
   "params": {
    "total": "400B",
    "active": "17B",
    "experts": "128"
   },
   "context": {
    "windowTokens": 1000000,
    "maxOutputTokens": null
   },
   "pricing": {
    "selfHost": true,
    "inputPerM": null,
    "outputPerM": null
   },
   "benchmarks": {
    "MMLU-Pro (0-shot)": 80.5,
    "GPQA Diamond": 69.8,
    "LiveCodeBench": 43.4,
    "MGSM": 92.3
   },
   "sources": {
    "release": "https://ai.meta.com/blog/llama-4-multimodal-intelligence/",
    "license": "https://www.llama.com/llama4/license/",
    "context": "https://www.llama.com/docs/model-cards-and-prompt-formats/llama4/",
    "benchmarks": "https://huggingface.co/meta-llama/Llama-4-Maverick-17B-128E-Instruct"
   },
   "verifiedDate": "2026-05-31",
   "notes": "Open weight; $0 to self-host. 1M-token context, 128 experts. Llama 4 Behemoth (288B active / ~2T total) was only ever previewed as 'still training' and was never released — do not list specs for it as a usable model."
  },
  {
   "id": "phi-4",
   "name": "Phi-4",
   "provider": "Microsoft",
   "apiModelId": "microsoft/phi-4",
   "license": "MIT",
   "releaseDate": "2024-12-12",
   "params": {
    "total": "14B",
    "active": "14B",
    "type": "dense"
   },
   "context": {
    "windowTokens": 16000,
    "maxOutputTokens": null
   },
   "pricing": {
    "selfHost": true,
    "inputPerM": null,
    "outputPerM": null
   },
   "benchmarks": {
    "GPQA Diamond": 56.1,
    "MMLU": 84.8,
    "HumanEval": 82.6,
    "MATH": 80.4
   },
   "sources": {
    "release": "https://huggingface.co/microsoft/phi-4",
    "license": "https://huggingface.co/microsoft/phi-4",
    "context": "https://huggingface.co/microsoft/phi-4",
    "benchmarks": "https://www.microsoft.com/en-us/research/publication/phi-4-technical-report/"
   },
   "verifiedDate": "2026-06-03",
   "notes": "Verified 2026-06-03: MIT-licensed, 14B dense, 16K context, self-host only (no Microsoft per-token API; available on Azure AI Foundry + Hugging Face). Released 2024-12-12. Benchmarks from the Phi-4 technical report / model card."
  },
  {
   "id": "claude-fable-5",
   "name": "Claude Fable 5",
   "provider": "Anthropic",
   "apiModelId": "claude-fable-5",
   "license": "proprietary",
   "releaseDate": "2026-06-09",
   "context": {
    "windowTokens": 1000000,
    "maxOutputTokens": 128000
   },
   "pricing": {
    "inputPerM": 10.0,
    "outputPerM": 50.0,
    "cachedInputPerM": 1.0,
    "note": "Prompt caching = 90% input discount per the launch announcement. Included free on Pro/Max/Team/seat-based Enterprise June 9-22, 2026; usage credits from June 23."
   },
   "benchmarks": {
    "SWE-bench Pro": 80.3,
    "SWE-bench Verified": null,
    "GPQA Diamond": null,
    "note": "Anthropic's launch table reports the higher of Mythos 5 / Fable 5 per row (within 1-3 points of each other). Starred rows (cybersecurity, biology, some reasoning) reflect Mythos 5; real-world Fable 5 results there are closer to Opus 4.8 because of the safety classifiers. SWE-bench Verified and GPQA Diamond were not published at launch."
   },
   "sources": {
    "release": "https://www.anthropic.com/news/claude-fable-5-mythos-5",
    "pricing": "https://platform.claude.com/docs/en/about-claude/models/overview",
    "context": "https://platform.claude.com/docs/en/about-claude/models/overview",
    "benchmarks": "https://www.anthropic.com/news/claude-fable-5-mythos-5"
   },
   "verifiedDate": "2026-06-12",
   "notes": "Mythos-class model, generally available June 9, 2026 on the Claude API, AWS, Bedrock, Vertex AI, and Microsoft Foundry. Same underlying model as Claude Mythos 5 (claude-mythos-5, Glasswing-only) but with safety classifiers: offensive-cyber, most biology/chemistry, and distillation requests fall back to Claude Opus 4.8. Anthropic says more than 95% of Fable sessions involve no fallback. Adaptive thinking always on; no extended-thinking toggle. Uses the Opus 4.7 tokenizer (~30% more tokens than pre-4.7 models for the same text). 30-day data retention on all Mythos-class traffic. Availability update, June 12, 2026: Anthropic suspended Fable 5 (and Mythos 5) for all customers to comply with a U.S. Commerce Department export-control directive barring Mythos-class use by any foreign national; AWS revoked Bedrock access the same day. All other Anthropic models are unaffected; Anthropic disputes the recall and says it is working to restore access. Listed price, context, and benchmark figures are unchanged. Sources: AWS News Blog (Claude Fable 5 on AWS) June 12 update; June 12-13 reporting."
  },
  {
   "id": "claude-mythos-5",
   "name": "Claude Mythos 5",
   "provider": "Anthropic",
   "apiModelId": "claude-mythos-5",
   "license": "proprietary",
   "releaseDate": "2026-06-09",
   "status": "NOT generally available — limited availability to approved Project Glasswing customers only.",
   "context": {
    "windowTokens": 1000000,
    "maxOutputTokens": 128000
   },
   "pricing": {
    "inputPerM": 10.0,
    "outputPerM": 50.0,
    "cachedInputPerM": 1.0,
    "note": "Same listed pricing as Fable 5 (incl. $1 cache-hit input); access is restricted to approved Glasswing organizations."
   },
   "benchmarks": {
    "note": "Shares the launch benchmark table with Fable 5 (scores shown are the higher of the two, within 1-3 points). Anthropic calls its cybersecurity capabilities the strongest of any model in the world."
   },
   "sources": {
    "release": "https://www.anthropic.com/news/claude-fable-5-mythos-5",
    "pricing": "https://platform.claude.com/docs/en/about-claude/models/overview",
    "context": "https://platform.claude.com/docs/en/about-claude/models/overview"
   },
   "verifiedDate": "2026-06-12",
   "notes": "Successor to Claude Mythos Preview inside Project Glasswing (deployed in collaboration with the US government). Same underlying model as Fable 5 with safeguards lifted in approved areas. No self-serve access. Availability update, June 12, 2026: suspended for all customers alongside Fable 5 under the same U.S. Commerce Department export-control directive (barring Mythos-class use by any foreign national); Anthropic says it is working to restore access. Listed figures are unchanged."
  },
  {
   "id": "gpt-5-4",
   "name": "GPT-5.4",
   "provider": "OpenAI",
   "apiModelId": "gpt-5.4",
   "license": "proprietary",
   "releaseDate": "2026-03-05",
   "context": {
    "windowTokens": 1000000,
    "maxOutputTokens": 128000,
    "note": "OpenAI markets context 'as large as 1 million tokens' on the API. Inputs beyond the standard 272K window carry a long-context surcharge on the official pricing page."
   },
   "pricing": {
    "inputPerM": 2.5,
    "outputPerM": 15.0,
    "cachedInputPerM": 0.25,
    "note": "Long-context surcharge above 272K input per the official pricing page."
   },
   "benchmarks": {
    "OSWorld-Verified": 75.0,
    "SWE-bench Verified": null,
    "note": "OSWorld-Verified 75% is from OpenAI's launch material (vs 47.3% for GPT-5.2; human baseline 72.4%). No official SWE-bench Verified figure found for GPT-5.4."
   },
   "sources": {
    "release": "https://openai.com/index/introducing-gpt-5-4-mini-and-nano/",
    "pricing": "https://openai.com/api/pricing/",
    "context": "https://openai.com/api/pricing/",
    "benchmarks": "https://help.openai.com/en/articles/9624314-model-release-notes"
   },
   "verifiedDate": "2026-06-10",
   "notes": "Released March 5, 2026 as GPT-5.4 Thinking + GPT-5.4 Pro ('most capable and efficient frontier model for professional work'); GPT-5.4 mini and nano followed March 17 (mini available to free tier, nano API-only). Built-in computer use; tuned for finance workflows (launched alongside ChatGPT for Excel, which it powered at launch). benchr removed an earlier gpt-5-4 entry on June 1, 2026 as 'unverified' — that removal was wrong; the model is real and was re-added after this June 10 verification. Superseded as OpenAI's flagship by GPT-5.5 (April 23, 2026)."
  },
  {
   "id": "claude-opus-4-6",
   "name": "Claude Opus 4.6",
   "provider": "Anthropic",
   "apiModelId": "claude-opus-4-6",
   "license": "proprietary",
   "releaseDate": "2026-02-05",
   "status": "Active (official lifecycle state on the Anthropic deprecations table, June 12, 2026); superseded as the newest Opus by 4.7 (April 2026) and 4.8 (May 2026) but fully supported.",
   "context": {
    "windowTokens": 1000000,
    "maxOutputTokens": 128000
   },
   "pricing": {
    "inputPerM": 5.0,
    "outputPerM": 25.0,
    "fastModeInputPerM": 30.0,
    "fastModeOutputPerM": 150.0,
    "batchInputPerM": 2.5,
    "batchOutputPerM": 12.5,
    "cachedInputPerM": 0.5
   },
   "tentativeRetirementFloor": "2027-02-05",
   "benchmarks": {
    "note": "Launch benchmarks not re-verified for this entry; cite from the official announcement if needed."
   },
   "sources": {
    "release": "https://www.anthropic.com/news/claude-opus-4-6",
    "pricing": "https://platform.claude.com/docs/en/about-claude/models/overview",
    "context": "https://platform.claude.com/docs/en/about-claude/models/overview"
   },
   "verifiedDate": "2026-06-12",
   "notes": "Released February 5, 2026. First Opus with the 1M-token context window (beta at launch); extended thinking AND adaptive thinking. The official deprecations table (June 12, 2026) lists it ACTIVE at $5/$25, 1M context, 128K max output, with a tentative retirement floor of February 5, 2027; fast mode ($30/$150, same rate as Opus 4.7) and $0.50 cache-hit input are on the official pricing page. An earlier benchr audit wrongly flagged 'Opus 4.6' as a non-existent model — it is real; the Opus lane runs 4.6 (Feb) → 4.7 (Apr) → 4.8 (May)."
  },
  {
   "id": "grok-4-20",
   "name": "Grok 4.20",
   "provider": "xAI",
   "apiModelId": "grok-4.20",
   "license": "proprietary",
   "releaseDate": "2026-03-10",
   "status": "Superseded as xAI's flagship by Grok 4.3 (April 2026); still listed in the xAI docs.",
   "context": {
    "windowTokens": 2000000
   },
   "pricing": {
    "inputPerM": 2.0,
    "outputPerM": 6.0
   },
   "benchmarks": {
    "note": "Not re-verified for this entry."
   },
   "sources": {
    "release": "https://docs.x.ai/developers/models/grok-4.20",
    "pricing": "https://docs.x.ai/developers/models/grok-4.20",
    "context": "https://docs.x.ai/developers/models/grok-4.20"
   },
   "verifiedDate": "2026-06-10",
   "notes": "Public beta February 17, 2026; second beta March 3; API release March 10 at $2/$6 with a 2M-token context window; exited beta mid-March with Auto/Fast/Expert/Heavy modes and a multi-agent variant (Grok 4.20 Multi-Agent). Predecessor to Grok 4.3 — benchr's Grok 4.3 review correctly cites it as the prior release."
  },
  {
   "id": "qwen-3-5-397b-a17b",
   "name": "Qwen3.5-397B-A17B",
   "provider": "Alibaba (Qwen team)",
   "apiModelId": null,
   "license": null,
   "releaseDate": "2026-02-16",
   "status": "Previous generation — superseded by the Qwen3.6 series (April 2026).",
   "context": {
    "windowTokens": 262144,
    "extendedWindowTokens": 1010000
   },
   "pricing": {
    "inputPerM": null,
    "outputPerM": null,
    "note": "Hosted-API pricing not confirmed on an official Alibaba page; third-party claims of $0.10/M input are unverified."
   },
   "benchmarks": {
    "note": "Not re-verified for this entry."
   },
   "sources": {
    "release": "https://github.com/QwenLM/Qwen3.6",
    "context": "https://huggingface.co/Qwen"
   },
   "verifiedDate": "2026-06-10",
   "notes": "Qwen3.5 flagship: 397B total / 17B active MoE (512 experts, 10 routed + 1 shared), hybrid Gated Delta Network + MoE architecture, native 262,144-token context extensible to ~1,010,000. The Qwen3.5 family spans nine sizes (Small 0.8B-9B, Medium 27B/35B-A3B/122B-A10B, flagship 397B-A17B); most open-weight under Apache 2.0 — the flagship's exact license was not re-verified, so it is left null. Qwen3.6 (April 2026) is the current series and what benchr's tools track."
  }
 ]
}