{
  "$schema_version": 1,
  "generated_at": "2026-06-16",
  "notes": "Refreshed 2026-06-16: added Opus 4.8, Fable 5, Qwen 3.7 Max/Plus, Kimi K2.7-Code, GLM-5.2, Gemini 3.5 Pro/Flash, Grok Build 0.1, FLUX.1 Tools and FLUX.2 [klein].",
  "vendors": {
    "claude": {
      "display_name": "Anthropic",
      "product_line": "Claude",
      "verified_at": "2026-06-16",
      "release_notes_url": "https://www.anthropic.com/news",
      "docs_url": "https://docs.claude.com",
      "models": [
        {
          "id": "claude-fable-5",
          "name": "Claude Fable 5",
          "tier": "mythos",
          "released": "2026-06-09",
          "context": 200000,
          "max_output": null,
          "modalities": [
            "text",
            "vision"
          ],
          "open_weights": false,
          "status": "current",
          "pricing": {
            "input_per_m_usd": 9.0,
            "output_per_m_usd": 45.0
          },
          "best_for": "New Mythos-class tier — Anthropic's most capable generally-available model. Exceeds anything previously shipped to the public.",
          "source_url": "https://www.anthropic.com/news",
          "source_date": "2026-06-09"
        },
        {
          "id": "claude-mythos-5",
          "name": "Claude Mythos 5",
          "tier": "restricted",
          "released": "2026-06-09",
          "context": 200000,
          "max_output": null,
          "modalities": [
            "text",
            "vision"
          ],
          "open_weights": false,
          "status": "trusted-access",
          "pricing": null,
          "best_for": "Restricted to approved programs and trusted-access partners. Most capable Anthropic model overall.",
          "source_url": "https://www.anthropic.com/news",
          "source_date": "2026-06-09"
        },
        {
          "id": "claude-opus-4.8",
          "name": "Claude Opus 4.8",
          "tier": "flagship",
          "released": "2026-05-28",
          "context": 200000,
          "max_output": null,
          "modalities": [
            "text",
            "vision"
          ],
          "open_weights": false,
          "status": "current",
          "pricing": {
            "input_per_m_usd": 5.0,
            "output_per_m_usd": 25.0
          },
          "best_for": "Hard reasoning + careful coding. ~4× less likely than Opus 4.7 to let code flaws slip past. Ships with Dynamic Workflows preview (parallel subagents) in Claude Code.",
          "source_url": "https://www.anthropic.com/news",
          "source_date": "2026-05-28"
        },
        {
          "id": "claude-opus-4.7",
          "name": "Claude Opus 4.7",
          "tier": "previous",
          "released": "2026-04-16",
          "context": 200000,
          "max_output": null,
          "modalities": [
            "text",
            "vision"
          ],
          "open_weights": false,
          "status": "previous",
          "pricing": {
            "input_per_m_usd": 5.0,
            "output_per_m_usd": 25.0
          },
          "best_for": "Previous flagship — superseded by Opus 4.8 on 2026-05-28. Still available.",
          "source_url": "https://www.anthropic.com/news",
          "source_date": "2026-04-16"
        },
        {
          "id": "claude-sonnet-4.6",
          "name": "Claude Sonnet 4.6",
          "tier": "mid",
          "released": "2026-03-12",
          "context": 200000,
          "max_output": null,
          "modalities": [
            "text",
            "vision"
          ],
          "open_weights": false,
          "status": "current",
          "pricing": {
            "input_per_m_usd": 1.0,
            "output_per_m_usd": 5.0
          },
          "best_for": "Mid-tier workhorse. Best price/quality for most production work.",
          "source_url": "https://www.anthropic.com/news",
          "source_date": "2026-03-12"
        },
        {
          "id": "claude-haiku-4.5",
          "name": "Claude Haiku 4.5",
          "tier": "cheap",
          "released": "2026-02-18",
          "context": 200000,
          "max_output": null,
          "modalities": [
            "text",
            "vision"
          ],
          "open_weights": false,
          "status": "current",
          "pricing": {
            "input_per_m_usd": 0.25,
            "output_per_m_usd": 1.25
          },
          "best_for": "Fast / cheap tier for high-volume classification, extraction, simple chat.",
          "source_url": "https://www.anthropic.com/news",
          "source_date": "2026-02-18"
        }
      ],
      "how_it_works": {
        "technical": {
          "what_it_is": "Anthropic's Claude family — Opus 4.7 (flagship), Sonnet 4.6 (mid-tier workhorse), Haiku 4.5 (fast/cheap tier). Trained with Constitutional AI plus RLHF; tuned for nuanced reasoning, careful writing, and safety.",
          "training": "Pretrained on a curated mix of public, licensed, and synthetic data, then aligned with Constitutional AI — a small set of guiding principles the model uses to critique and revise its own responses — plus RLHF refining against human preferences. Opus 4.7 adds an xhigh effort dial that allocates more internal reasoning per response.",
          "strengths": [
            "Ambiguous, judgment-heavy reasoning and open-ended strategy",
            "Long-form writing with a consistent voice",
            "Code review and careful debugging (Claude Code stack)",
            "Following nuanced instructions precisely",
            "Strong vision (98.5% on Anthropic's visual-acuity benchmark for Opus 4.7)",
            "MCP-native — cleanest agent tooling story"
          ],
          "weaknesses": [
            "Smaller context window (200K) than 1M+ competitors",
            "Higher per-token price than peers ($5 / $25 on Opus)",
            "No native image / video / music generation",
            "Tokenizer change in 4.7 increases tokens 1.0-1.35x over 4.6"
          ],
          "prompt_style": {
            "structure": "Wrap inputs in XML-style tags — Claude was trained to attend to structured markup. Use <task>, <context>, <input>, <output_format>, <thinking> to clearly separate parts of the prompt.",
            "voice": "Direct and explicit. State the role, goal, audience, and exact output format up front. Place the question AT THE END after any long document.",
            "advanced": [
              "Prefill the assistant turn with the first few characters to lock format (e.g. starting with '{' to force JSON)",
              "Use <thinking> tags or 'think step by step' to elicit chain-of-thought, especially on Opus 4.7 at xhigh effort",
              "Few-shot with 2-3 ranked examples beats describing the desired output abstractly",
              "Use 'You are a {role}' system messages, but keep them short — 1-3 lines beats 30"
            ]
          },
          "optimal_keywords": [
            "<thinking>...</thinking>",
            "Think step by step",
            "Consider the trade-offs",
            "Be specific and concrete",
            "<task>",
            "<input>",
            "<output_format>",
            "Role:",
            "Goal:",
            "Audience:",
            "Constraints:",
            "Format the answer as"
          ],
          "patterns": [
            {
              "name": "XML structure",
              "example": "<task>Review this code for bugs</task>\n<code>...</code>\n<output_format>file:line — severity — fix</output_format>"
            },
            {
              "name": "Role + Goal + Format header",
              "example": "Role: Senior backend reviewer.\nGoal: Find the highest-impact bug.\nFormat: One bullet per issue with file:line — severity — fix."
            },
            {
              "name": "Prefill enforcement",
              "example": "User: ...output as JSON.\nAssistant: {\n  \"issues\": ["
            }
          ],
          "anti_patterns": [
            "Burying instructions mid-paragraph instead of using structure",
            "Asking for 'comprehensive' analysis without length / section bounds",
            "Bundling 5 requests in one prompt",
            "Over-flattering the model ('you are the world's leading expert') — burns tokens, doesn't help"
          ],
          "specific_tips": [
            "Use Sonnet for coding agents, Opus for ambiguous strategy, Haiku for triage/routing",
            "Set effort=xhigh on Opus 4.7 for genuinely hard reasoning",
            "Prefill the assistant's first few characters to enforce output format",
            "For long inputs, place the QUESTION at the END after the document — Claude weights end-of-prompt heavily",
            "Use prompt caching for stable long prefixes (huge cost savings)",
            "MCP servers are first-class — use them for tool access rather than rolling your own",
            "If a response drifts, push back directly: 'You drifted — restate my ask in one sentence then answer it.'"
          ]
        },
        "plain": {
          "what_it_is": "Claude is Anthropic's AI assistant — like a thoughtful colleague you can chat with. There are three sizes: Opus (the genius, slower), Sonnet (the everyday workhorse), and Haiku (the speedster). It's known for being careful, honest, and good at writing.",
          "training": "Anthropic taught Claude using a huge library of text, then taught it to follow a short list of guiding principles — kind of like a code of ethics — so it can check its own answers and revise them. Real humans also rated its responses to help it learn what good and bad answers look like.",
          "strengths": [
            "Tough, judgment-heavy questions where there's no single right answer",
            "Writing that actually sounds like a real person — same voice all the way through",
            "Reading code carefully and explaining what's wrong (Claude Code)",
            "Following multi-step instructions without losing track",
            "Looking at images and understanding what's in them",
            "Plugging into tools and apps cleanly (MCP standard)"
          ],
          "weaknesses": [
            "Can only hold about 200,000 words in its head at once — less than some rivals",
            "Costs more per word than most competitors",
            "Doesn't make pictures, videos, or music",
            "Sometimes refuses borderline-edgy requests other models would handle"
          ],
          "prompt_style": {
            "structure": "Use tags like <task>, <context>, <input> to label the parts of your request — Claude was trained to spot them. Think of it like writing labeled headers on a memo.",
            "voice": "Be direct. Say who you are, what you want, who it's for, and what the answer should look like. If you're pasting in a long document, put your question at the very end.",
            "advanced": [
              "Ask Claude to 'think step by step' for tricky problems — Opus has a special 'high effort' mode that thinks longer",
              "Have Claude critique its own first draft before answering",
              "Give 2-3 examples of the kind of answer you want, then ask for one more",
              "If you're using Claude in an app, you can set its personality once at the start"
            ]
          },
          "optimal_keywords": [
            "think step by step",
            "be precise",
            "before answering, consider",
            "draft, then critique, then revise",
            "format as",
            "constraints:",
            "avoid:",
            "audience:",
            "tone:",
            "<task>",
            "<context>",
            "if uncertain, say so"
          ],
          "patterns": [
            {
              "name": "Role + Task + Constraints",
              "example": "You are a careful editor.\nTask: rewrite this paragraph for 8th-grade readers.\nKeep it under 80 words. Don't change the facts."
            },
            {
              "name": "Big document with the question at the end",
              "example": "<context>\n[paste your 5,000-word document here]\n</context>\n\nWhat are the three biggest risks mentioned above?"
            },
            {
              "name": "Draft, critique, revise",
              "example": "Step 1: write a first draft.\nStep 2: list three things wrong with it.\nStep 3: rewrite to fix them."
            }
          ],
          "anti_patterns": [
            "Putting your question first and a giant document underneath — flip it",
            "Being vague about who the answer is for or what format you want",
            "Telling it to 'be creative' without giving any boundaries",
            "Asking it to lie about being an AI — it won't, and you'll waste a turn"
          ],
          "specific_tips": [
            "Long context goes first, your question last",
            "Use Claude Code (the terminal tool) for serious code work",
            "Sonnet 4.6 is usually the right call — only reach for Opus on the really hard stuff",
            "When you change anything in the system instructions, the whole cache resets — group changes together",
            "If you want consistent formatting, show one example first"
          ]
        }
      }
    },
    "openai": {
      "display_name": "OpenAI",
      "product_line": "GPT",
      "verified_at": "2026-06-16",
      "release_notes_url": "https://help.openai.com/en/articles/9624314-model-release-notes",
      "docs_url": "https://platform.openai.com/docs",
      "models": [
        {
          "id": "gpt-5.5-instant",
          "name": "GPT-5.5 Instant",
          "tier": "default",
          "released": "2026-05-05",
          "context": 256000,
          "max_output": null,
          "modalities": [
            "text",
            "vision"
          ],
          "open_weights": false,
          "status": "current",
          "pricing": {
            "input_per_m_usd": 1.5,
            "output_per_m_usd": 8.0
          },
          "best_for": "ChatGPT's default model since 2026-05-05. Faster, more accurate, fewer hallucinations vs 5.4 Instant. Improved personalization controls.",
          "source_url": "https://techcrunch.com/2026/05/05/openai-releases-gpt-5-5-instant-a-new-default-model-for-chatgpt/",
          "source_date": "2026-05-05"
        },
        {
          "id": "gpt-5.5",
          "name": "GPT-5.5 / 5.5 Pro",
          "tier": "flagship",
          "released": "2026-04-23",
          "context": 400000,
          "max_output": null,
          "modalities": [
            "text",
            "vision"
          ],
          "open_weights": false,
          "status": "current",
          "pricing": {
            "input_per_m_usd": 5.0,
            "output_per_m_usd": 25.0
          },
          "best_for": "Smartest GPT — built for complex coding, research, and data analysis across tools.",
          "source_url": "https://openai.com/research/index/release/",
          "source_date": "2026-04-23"
        },
        {
          "id": "gpt-5.4",
          "name": "GPT-5.4",
          "tier": "mid",
          "released": "2026-02-20",
          "context": 256000,
          "max_output": null,
          "modalities": [
            "text",
            "vision"
          ],
          "open_weights": false,
          "status": "current",
          "pricing": {
            "input_per_m_usd": 1.5,
            "output_per_m_usd": 8.0
          },
          "best_for": "Mid-tier general workhorse for production apps.",
          "source_url": "https://help.openai.com/en/articles/9624314-model-release-notes",
          "source_date": "2026-02-20"
        },
        {
          "id": "gpt-5.4-pro",
          "name": "GPT-5.4 Pro",
          "tier": "high",
          "released": "2026-03-08",
          "context": 256000,
          "max_output": null,
          "modalities": [
            "text",
            "vision"
          ],
          "open_weights": false,
          "status": "current",
          "pricing": {
            "input_per_m_usd": 3.0,
            "output_per_m_usd": 15.0
          },
          "best_for": "Quality-leaning tier just below the flagship.",
          "source_url": "https://help.openai.com/en/articles/9624314-model-release-notes",
          "source_date": "2026-03-08"
        },
        {
          "id": "gpt-5-mini",
          "name": "GPT-5 Mini",
          "tier": "cheap",
          "released": "2025-12-10",
          "context": 200000,
          "max_output": null,
          "modalities": [
            "text",
            "vision"
          ],
          "open_weights": false,
          "status": "current",
          "pricing": {
            "input_per_m_usd": 0.25,
            "output_per_m_usd": 1.25
          },
          "best_for": "Cheap tier for high-volume work.",
          "source_url": "https://help.openai.com/en/articles/9624314-model-release-notes",
          "source_date": "2025-12-10"
        },
        {
          "id": "o3",
          "name": "o3",
          "tier": "reasoning",
          "released": "2025-12-20",
          "context": 200000,
          "max_output": null,
          "modalities": [
            "text",
            "vision"
          ],
          "open_weights": false,
          "status": "current",
          "pricing": {
            "input_per_m_usd": 10.0,
            "output_per_m_usd": 40.0
          },
          "best_for": "Deep reasoning — pauses to think before answering. Best for math, hard logic, science.",
          "source_url": "https://help.openai.com/en/articles/9624314-model-release-notes",
          "source_date": "2025-12-20"
        },
        {
          "id": "gpt-realtime-2",
          "name": "GPT Realtime Voice (2026-05)",
          "tier": "voice",
          "released": "2026-05-07",
          "context": 128000,
          "max_output": null,
          "modalities": [
            "text",
            "audio"
          ],
          "open_weights": false,
          "status": "current",
          "pricing": null,
          "best_for": "Realtime voice — reason, translate, transcribe speech in a single low-latency model. API only.",
          "source_url": "https://openai.com/research/index/release/",
          "source_date": "2026-05-07"
        },
        {
          "id": "gpt-rosalind",
          "name": "GPT-Rosalind (Biodefense)",
          "tier": "restricted",
          "released": "2026-05-29",
          "context": null,
          "max_output": null,
          "modalities": [
            "text"
          ],
          "open_weights": false,
          "status": "trusted-access",
          "pricing": null,
          "best_for": "Restricted to vetted developers and U.S. government partners — biodefense, public health, pandemic preparedness. June 3 update added life-sciences depth (genomics, med chem).",
          "source_url": "https://openai.com/research/index/release/",
          "source_date": "2026-06-03"
        },
        {
          "id": "gpt-image-2",
          "name": "gpt-image-2",
          "tier": "image",
          "released": "2026-03-20",
          "context": null,
          "max_output": null,
          "modalities": [
            "image"
          ],
          "open_weights": false,
          "status": "current",
          "pricing": {
            "input_per_m_usd": null,
            "output_per_m_usd": null
          },
          "best_for": "Image generation and editing via Images API.",
          "source_url": "https://openai.com/research/index/release/",
          "source_date": "2026-03-20"
        }
      ],
      "how_it_works": {
        "technical": {
          "what_it_is": "OpenAI's GPT family — GPT-5.5 / 5.5 Pro (flagship), 5.4 (mid-tier with native Computer Use), GPT-5 Mini (volume), o3 (reasoning specialist), gpt-image-2 (images). Largest product surface in the industry; de facto API shape.",
          "training": "Pretrained on a broad web-scale corpus, aligned via supervised fine-tuning, RLHF, and rule-based reward models. The o-series and 5.4 family expose graduated 'reasoning_effort' dials (low/medium/high/xhigh) that trade latency for additional internal reasoning before the visible answer.",
          "strengths": [
            "Largest product surface (chat, Realtime voice, image gen, Computer Use, Codex)",
            "Best Computer Use today — GPT-5.4 at 75% OSWorld-Verified",
            "Most mature tools / function-calling ecosystem",
            "Codex for parallel cloud coding (open 6 PRs overnight)",
            "Structured Outputs (json_schema) — strict and reliable",
            "Massive ChatGPT consumer brand reach"
          ],
          "weaknesses": [
            "GPT-5.5 API still not GA at writing — ChatGPT-only lag",
            "Higher prices than Gemini/Grok at frontier",
            "Azure-heavy cloud distribution",
            "Sora 2 exiting the video category"
          ],
          "prompt_style": {
            "structure": "Use the messages array properly — system for persona/rules, user for the actual ask, assistant for exemplars. Heavy use of system is rewarded. Mark sections with ### Headers, fenced code blocks, or XML.",
            "voice": "Explicit, structured, with clear delimiters. The model attends to format markers. For tools, declare the function schema strictly — strict mode catches errors at the API.",
            "advanced": [
              "Tune reasoning_effort=high (or xhigh on Pro) for hard reasoning on o-series and 5.4 family",
              "Use Structured Outputs (response_format: {type: 'json_schema', strict: true})",
              "Function calling > parsing JSON from prose — declare tools explicitly",
              "Batch API for async work is 50% off; use it for evals and bulk transforms"
            ]
          },
          "optimal_keywords": [
            "Let's think step by step",
            "Reason step by step before answering",
            "Output JSON conforming to the schema:",
            "system:",
            "tools:",
            "reasoning_effort: 'high'",
            "tool_choice: 'auto'",
            "response_format: {type:'json_schema'}",
            "### Instructions ###"
          ],
          "patterns": [
            {
              "name": "Structured Outputs",
              "example": "response_format: {\n  type: 'json_schema',\n  json_schema: { name: 'Result', strict: true, schema: {...} }\n}"
            },
            {
              "name": "Effort dial",
              "example": "model: 'o3', reasoning_effort: 'high'  // for hard reasoning"
            },
            {
              "name": "Tool use loop",
              "example": "tools: [{type:'function', function:{name:'read_file',...}}]\ntool_choice: 'auto'"
            }
          ],
          "anti_patterns": [
            "Asking for JSON in prose instead of using response_format: json_schema",
            "Ignoring the effort dial on hard tasks (you pay the same; get worse answers)",
            "Stuffing rules in the user message when system is the right place",
            "Not using Batch API for offline evals"
          ],
          "specific_tips": [
            "Use Structured Outputs with strict: true — it's the most reliable JSON in any API",
            "Crank reasoning_effort for o3 and 5.4 on hard reasoning",
            "GPT-5.4 for any Computer Use task — 75% OSWorld beats human baseline",
            "Codex (cloud) for parallel PR-style coding work",
            "Batch API is 50% off — use it for evals, classification, bulk transforms",
            "Whisper is open-weight — self-host audio transcription cheaply",
            "Use system prompt for persona/rules; user for the actual task"
          ]
        },
        "plain": {
          "what_it_is": "OpenAI makes the GPT family — the AI that powers ChatGPT. The main models are GPT-5 (smart all-purpose), the o-series (deep thinkers that pause to reason), and smaller cheaper ones. Plus there's Sora for video and DALL·E for images.",
          "training": "OpenAI showed these models a massive amount of text from the internet and books, then had thousands of real people rate the answers to teach it what 'helpful' looks like. The o-series models go further — they're rewarded for working out tough problems by thinking longer before answering.",
          "strengths": [
            "Math, coding, and science where there's a right answer to check",
            "Working through hard problems by 'thinking out loud' (o-series)",
            "Calling tools and APIs reliably for agents",
            "Voice and video understanding through one model",
            "Huge developer ecosystem — most tutorials online assume GPT"
          ],
          "weaknesses": [
            "Can rush to answer when the question would benefit from a pause",
            "Less consistent writing voice than Claude on long pieces",
            "GPT-5 is expensive at heavy usage",
            "More likely to make up confident-sounding facts than Claude"
          ],
          "prompt_style": {
            "structure": "Markdown works great — use # headers and bullets. Or use simple labels like Role:, Goal:, Constraints:, Output:.",
            "voice": "Be specific about what good and bad look like. For coding, paste the error message. For writing, name the audience and tone.",
            "advanced": [
              "For tough problems, switch to an o-series model and let it think",
              "Use function calling / structured output for anything you'll process by code",
              "For ChatGPT, custom instructions let you set your role and preferences once",
              "If the answer matters, ask GPT to show its reasoning"
            ]
          },
          "optimal_keywords": [
            "think carefully",
            "show your reasoning",
            "step by step",
            "verify before answering",
            "output as JSON",
            "format:",
            "constraints:",
            "good example:",
            "bad example:",
            "audience:",
            "tone:"
          ],
          "patterns": [
            {
              "name": "Hard problem with deep thinking",
              "example": "Use o3 model.\nProblem: [your problem]\nThink through it carefully, then give your answer."
            },
            {
              "name": "Code task with examples",
              "example": "Task: write a function that...\nInput example: ...\nExpected output: ...\nEdge cases to handle: ..."
            },
            {
              "name": "Structured output for an app",
              "example": "Reply ONLY with JSON in this shape:\n{ \"summary\": string, \"key_points\": string[] }"
            }
          ],
          "anti_patterns": [
            "Using a fast model for a question that needs deep thought",
            "Asking it to be creative with no examples — you'll get generic output",
            "Trusting numbers without asking it to double-check",
            "Mixing two tasks in one message — split them"
          ],
          "specific_tips": [
            "If you need reliability, use the o-series and accept slower answers",
            "Always show examples for formatting — GPT mirrors what you show it",
            "Use system messages to lock in the role and tone",
            "Turn on web search when current info matters; turn it off when it doesn't"
          ]
        }
      }
    },
    "google": {
      "display_name": "Google",
      "product_line": "Gemini / Imagen / Veo / Gemma",
      "verified_at": "2026-06-16",
      "release_notes_url": "https://ai.google.dev/gemini-api/docs/changelog",
      "docs_url": "https://ai.google.dev",
      "models": [
        {
          "id": "gemini-3.5-pro",
          "name": "Gemini 3.5 Pro",
          "tier": "flagship",
          "released": "2026-05-19",
          "context": 2000000,
          "max_output": null,
          "modalities": [
            "text",
            "vision",
            "audio",
            "video"
          ],
          "open_weights": false,
          "status": "preview",
          "pricing": null,
          "best_for": "Frontier multimodal flagship announced at Google I/O 2026. GA expected late June. Limited preview as of 2026-06-16.",
          "source_url": "https://ai.google.dev/gemini-api/docs/changelog",
          "source_date": "2026-05-19"
        },
        {
          "id": "gemini-3.5-flash",
          "name": "Gemini 3.5 Flash",
          "tier": "fast",
          "released": "2026-06-08",
          "context": 1000000,
          "max_output": null,
          "modalities": [
            "text",
            "vision",
            "audio"
          ],
          "open_weights": false,
          "status": "current",
          "pricing": {
            "input_per_m_usd": 0.1,
            "output_per_m_usd": 0.4
          },
          "best_for": "Generally available in Gemini Enterprise (default since 2026-06-08). Frontier-intelligence Flash tier — fast/cheap/multimodal.",
          "source_url": "https://docs.cloud.google.com/gemini/enterprise/docs/release-notes",
          "source_date": "2026-06-08"
        },
        {
          "id": "gemini-3.1-pro",
          "name": "Gemini 3.1 Pro",
          "tier": "previous",
          "released": "2026-03-25",
          "context": 1000000,
          "max_output": null,
          "modalities": [
            "text",
            "vision",
            "audio",
            "video"
          ],
          "open_weights": false,
          "status": "previous",
          "pricing": {
            "input_per_m_usd": 1.25,
            "output_per_m_usd": 10.0
          },
          "best_for": "Previous flagship — superseded by 3.5 Pro preview. Still GA.",
          "source_url": "https://ai.google.dev/gemini-api/docs/changelog",
          "source_date": "2026-03-25"
        },
        {
          "id": "gemini-3-flash",
          "name": "Gemini 3 Flash",
          "tier": "previous",
          "released": "2026-02-14",
          "context": 1000000,
          "max_output": null,
          "modalities": [
            "text",
            "vision",
            "audio"
          ],
          "open_weights": false,
          "status": "previous",
          "pricing": {
            "input_per_m_usd": 0.1,
            "output_per_m_usd": 0.4
          },
          "best_for": "Previous Flash — superseded by 3.5 Flash on 2026-06-08.",
          "source_url": "https://ai.google.dev/gemini-api/docs/changelog",
          "source_date": "2026-02-14"
        },
        {
          "id": "gemini-3.1-flash-lite",
          "name": "Gemini 3.1 Flash-Lite",
          "tier": "cheap",
          "released": "2026-03-25",
          "context": 1000000,
          "max_output": null,
          "modalities": [
            "text",
            "vision"
          ],
          "open_weights": false,
          "status": "current",
          "pricing": {
            "input_per_m_usd": 0.05,
            "output_per_m_usd": 0.2
          },
          "best_for": "Cheapest Gemini — high-volume classification/extraction.",
          "source_url": "https://ai.google.dev/gemini-api/docs/changelog",
          "source_date": "2026-03-25"
        },
        {
          "id": "veo-3",
          "name": "Veo 3",
          "tier": "video",
          "released": "2026-04-10",
          "context": null,
          "max_output": null,
          "modalities": [
            "video",
            "audio"
          ],
          "open_weights": false,
          "status": "current",
          "pricing": null,
          "best_for": "Cinematic video with native audio/dialogue.",
          "source_url": "https://ai.google.dev/gemini-api/docs/changelog",
          "source_date": "2026-04-10"
        },
        {
          "id": "imagen-4-ultra",
          "name": "Imagen 4 Ultra",
          "tier": "image",
          "released": "2026-03-25",
          "context": null,
          "max_output": null,
          "modalities": [
            "image"
          ],
          "open_weights": false,
          "status": "current",
          "pricing": null,
          "best_for": "Top-tier text rendering inside images, photoreal.",
          "source_url": "https://ai.google.dev/gemini-api/docs/changelog",
          "source_date": "2026-03-25"
        },
        {
          "id": "gemma-4",
          "name": "Gemma 4",
          "tier": "open",
          "released": "2026-02-28",
          "context": 256000,
          "max_output": null,
          "modalities": [
            "text"
          ],
          "open_weights": true,
          "status": "current",
          "pricing": null,
          "best_for": "Open-weights Gemma family for self-hosted deployment.",
          "source_url": "https://ai.google.dev/gemma",
          "source_date": "2026-02-28"
        }
      ],
      "how_it_works": {
        "technical": {
          "what_it_is": "Google's Gemini family — 3.1 Pro (flagship, 2M context, Deep Think), 3 Flash & 3.1 Flash-Lite (cheap tiers), Imagen 4 / Veo 3 / Lyria (media), Gemma 4 (open weights). Multimodal-native from day one.",
          "training": "Pretrained jointly across text, images, video, and audio in a single architecture — not bolt-on multimodality. Deep Think mode adds significant test-time compute for hardest problems. Aligned via RLHF and Constitutional-style methods. Gemma 4 is a maintained open-weight family in the same lineage.",
          "strengths": [
            "Largest context window in the market (2M on 3.1 Pro)",
            "Strongest native multimodality (drop in a video, it just reads)",
            "Best abstract reasoning benchmarks (77.1% ARC-AGI-2, 94.3% GPQA Diamond at release)",
            "Most generous free dev tier (AI Studio)",
            "Best in-image text rendering (Imagen 4)",
            "Veo 3 dominates video gen with synchronized audio",
            "Only first-party open-weight family with frontier-adjacent quality (Gemma 4)"
          ],
          "weaknesses": [
            "Less mature agent stack than Claude Code or Codex",
            "Project Mariner (browser agent) still research-preview",
            "Pro tier paid-only since 2026-04-01",
            "Workspace AI integration depth varies by SKU"
          ],
          "prompt_style": {
            "structure": "Use system_instruction for persona/rules. Few-shot is heavily rewarded — provide 2-3 examples of the desired output. Multimodal inputs go inline (image, video, audio, PDF).",
            "voice": "Direct, audience-aware. Specify desired length and format. The model responds well to chained sub-prompts that build context.",
            "advanced": [
              "Use grounding (tools: [{google_search: {}}]) for fresh / time-sensitive facts",
              "Drop video, audio, or large PDFs directly into the prompt — no preprocessing needed",
              "Deep Think mode for hardest problems (math, abstract reasoning)",
              "Context caching dramatically reduces cost on long stable prefixes (50K+ tokens)"
            ]
          },
          "optimal_keywords": [
            "Think carefully",
            "Show your reasoning",
            "Format as",
            "Use markdown headings",
            "system_instruction:",
            "tools: [google_search]",
            "Grounding:",
            "Audience:",
            "Cite sources from the document",
            "At timestamp [X:XX]"
          ],
          "patterns": [
            {
              "name": "Multimodal in-prompt",
              "example": "[video clip attached]\nWhat happens at 0:42? Cite the timestamp."
            },
            {
              "name": "Search grounding",
              "example": "tools: [{google_search: {}}]\nWhat changed in the iPhone release this week? Cite sources."
            },
            {
              "name": "Deep Think for hard reasoning",
              "example": "Use Deep Think mode.\nProve that [theorem]. Show every step."
            }
          ],
          "anti_patterns": [
            "Stripping multimodal inputs and pasting transcripts instead (loses huge signal)",
            "Not using context caching on long stable prompts",
            "Reaching for a different vendor for image / video when Imagen + Veo are right there",
            "Skipping few-shot examples — Gemini rewards them more than peers"
          ],
          "specific_tips": [
            "Use AI Studio for prototyping — most generous free tier",
            "Drop video / audio / PDFs in directly; don't transcribe first",
            "Deep Think for math, ARC-AGI-style reasoning, hard proofs",
            "Imagen 4 Ultra has the best in-image text rendering",
            "Veo 3 for video — only model with truly synchronized native audio",
            "Gemma 4 for open-weight self-host with US provenance",
            "context_caching for any 50K+ stable prefix saves 75% on input",
            "Few-shot examples lift Gemini quality more than most peers"
          ]
        },
        "plain": {
          "what_it_is": "Google's Gemini is a family of models built to work with text, images, audio, and video all at once. The big draw: it can hold the equivalent of a small book (over a million words) in its head at the same time, which is way more than most.",
          "training": "Google trained Gemini on a huge mix of text, images, audio, and video together — so it learned them as one connected thing rather than separate skills. Like teaching a kid to read, look, and listen at the same time.",
          "strengths": [
            "Reading enormous documents — entire codebases or long videos — without losing the thread",
            "Understanding images and video deeply, not just identifying them",
            "Pulling answers from a long source document accurately",
            "Cheap-per-word when you're feeding it a lot",
            "Tight integration with Google Search, Docs, Drive, and YouTube"
          ],
          "weaknesses": [
            "Writing voice can feel a bit corporate and bland",
            "Tool-calling and agent behavior less polished than OpenAI or Claude",
            "Sometimes inconsistent — same question can get different-quality answers",
            "Safety filters can be overzealous on innocent requests"
          ],
          "prompt_style": {
            "structure": "Bullets and numbered lists work well. State the goal up top.",
            "voice": "Plain and specific. Tell it the format you want and what to leave out.",
            "advanced": [
              "Put very long documents in first, then your question — Gemini handles huge context",
              "Turn on 'Deep Think' or thinking mode for math and reasoning",
              "Ground answers in your own document, search results, or YouTube transcript URLs",
              "Use structured output mode for clean JSON"
            ]
          },
          "optimal_keywords": [
            "based on the document above",
            "cite the section",
            "summarize per chapter",
            "compare and contrast",
            "format as table",
            "step by step",
            "if not stated, say so"
          ],
          "patterns": [
            {
              "name": "Long document Q&A",
              "example": "[paste 100-page PDF as text]\n\nUsing only the document above, what are the three main arguments and which page does each appear on?"
            },
            {
              "name": "Video transcript analysis",
              "example": "Here's a YouTube transcript:\n[transcript]\n\nList the speaker's main claims and timestamp each one."
            },
            {
              "name": "Compare multiple sources",
              "example": "Document A: [doc1]\nDocument B: [doc2]\n\nWhere do they agree? Where do they disagree?"
            }
          ],
          "anti_patterns": [
            "Asking it to be creative without examples — output gets generic",
            "Hoping it remembers context from a previous chat — start fresh each task",
            "Vague requests like 'make this better' — say what better means"
          ],
          "specific_tips": [
            "Gemini shines when you give it a lot to read — don't be shy with the context",
            "If filters block a normal question, rephrase rather than fight them",
            "Use Google AI Studio (free) to experiment before paying for API",
            "Pro for hard stuff, Flash for fast/cheap stuff"
          ]
        }
      }
    },
    "xai": {
      "display_name": "xAI",
      "product_line": "Grok / Imagine",
      "verified_at": "2026-06-16",
      "release_notes_url": "https://docs.x.ai/docs/release-notes",
      "docs_url": "https://docs.x.ai",
      "models": [
        {
          "id": "grok-4.3",
          "name": "Grok 4.3",
          "tier": "flagship",
          "released": "2026-05-04",
          "context": 1000000,
          "max_output": null,
          "modalities": [
            "text",
            "vision",
            "video"
          ],
          "open_weights": false,
          "status": "current",
          "pricing": {
            "input_per_m_usd": 3.0,
            "output_per_m_usd": 15.0
          },
          "best_for": "Cost-efficient flagship — built-in reasoning, 1M context, native video input.",
          "source_url": "https://docs.x.ai/developers/release-notes",
          "source_date": "2026-05-04"
        },
        {
          "id": "grok-build-0.1",
          "name": "Grok Build 0.1",
          "tier": "coding",
          "released": "2026-06-01",
          "context": 256000,
          "max_output": null,
          "modalities": [
            "text",
            "vision"
          ],
          "open_weights": false,
          "status": "public-beta",
          "pricing": null,
          "best_for": "Purpose-built coding model for agentic workflows. xAI API public beta since 2026-06-01.",
          "source_url": "https://x.ai/news",
          "source_date": "2026-06-01"
        },
        {
          "id": "grok-v9-medium",
          "name": "Grok V9-Medium (expected)",
          "tier": "upcoming",
          "released": "2026-06-15",
          "context": null,
          "max_output": null,
          "modalities": [
            "text",
            "vision"
          ],
          "open_weights": false,
          "status": "expected",
          "pricing": null,
          "best_for": "1.5T-parameter foundation model — Musk announced training complete 2026-05-25, public release expected mid-June 2026.",
          "source_url": "https://x.ai/news",
          "source_date": "2026-05-25"
        },
        {
          "id": "grok-4.20",
          "name": "Grok 4.20",
          "tier": "mid",
          "released": "2026-04-20",
          "context": 256000,
          "max_output": null,
          "modalities": [
            "text",
            "vision"
          ],
          "open_weights": false,
          "status": "current",
          "pricing": {
            "input_per_m_usd": 1.0,
            "output_per_m_usd": 5.0
          },
          "best_for": "Mid-tier Grok — good price/quality.",
          "source_url": "https://docs.x.ai/developers/release-notes",
          "source_date": "2026-04-20"
        },
        {
          "id": "grok-4.1-fast",
          "name": "Grok 4.1 Fast",
          "tier": "fast",
          "released": "2026-03-15",
          "context": 256000,
          "max_output": null,
          "modalities": [
            "text",
            "vision"
          ],
          "open_weights": false,
          "status": "current",
          "pricing": {
            "input_per_m_usd": 0.2,
            "output_per_m_usd": 0.8
          },
          "best_for": "Cheap/fast Grok.",
          "source_url": "https://docs.x.ai/developers/release-notes",
          "source_date": "2026-03-15"
        },
        {
          "id": "grok-imagine-video",
          "name": "Grok Imagine Video",
          "tier": "video",
          "released": "2026-04-01",
          "context": null,
          "max_output": null,
          "modalities": [
            "video"
          ],
          "open_weights": false,
          "status": "current",
          "pricing": null,
          "best_for": "Video generation in the Grok app.",
          "source_url": "https://x.ai/news",
          "source_date": "2026-04-01"
        }
      ],
      "how_it_works": {
        "technical": {
          "what_it_is": "xAI's Grok family — Grok 4.3 (flagship, $1.25/$2.50, 1M context, always-on reasoning, native video input), Grok 4.20 (2M context fast frontier), Grok 4.1 Fast ($0.20/$0.50 with 2M context), Grok Imagine (image + video with sync audio). First-party access to live X data.",
          "training": "Pretrained on a web corpus plus real-time X data. Grok 4.3 has always-on reasoning — there is no effort dial; reasoning runs by default. SuperGrok Heavy enables multi-agent reasoning at the consumer tier. Distinctive personality tuning produces direct, opinionated responses.",
          "strengths": [
            "Cheapest hosted frontier ($1.25 / $2.50 list)",
            "Only model with first-party live X access (Live Search)",
            "Native video input on 4.3",
            "Always-on reasoning — no effort dial to manage",
            "Largest context option (Grok 4.20 at 2M)",
            "Grok 4.1 Fast — best raw bargain anywhere ($0.20 / $0.50 with 2M context)",
            "Distinctive direct personality (less hedging)"
          ],
          "weaknesses": [
            "No first-party coding agent product",
            "No public Computer Use API",
            "No public realtime voice API for product builds",
            "Smaller enterprise / cloud-marketplace presence",
            "Thinner safety/compliance documentation"
          ],
          "prompt_style": {
            "structure": "Standard OpenAI-compatible messages. No effort dial to set — reasoning is always on. For X-data queries, enable Live Search via the API parameter.",
            "voice": "Direct and opinionated prompts work better than soft hedges. The model is trained to respond crisply to crisp asks. Ask for an opinion — it'll give one.",
            "advanced": [
              "Enable Live Search for any 'what's happening now' query — it can grep X in real time",
              "SuperGrok Heavy unlocks multi-agent reasoning at $300/mo consumer tier",
              "Native video input on 4.3 — no transcript pre-step needed"
            ]
          },
          "optimal_keywords": [
            "What are people saying on X about",
            "Live Search:",
            "Be direct — don't hedge",
            "Pick a side and defend it",
            "What's your honest take",
            "Real-time:",
            "Cite the X posts"
          ],
          "patterns": [
            {
              "name": "Live Search for real-time X",
              "example": "Live Search: enabled.\nWhat were the top 5 most-discussed AI launches on X this week? Cite the posts."
            },
            {
              "name": "Direct-opinion ask",
              "example": "Don't hedge. Give me your actual recommendation: should I X or Y? You can be wrong."
            },
            {
              "name": "Native video input",
              "example": "[video attached]\nDescribe what happens, then critique the on-screen UI."
            }
          ],
          "anti_patterns": [
            "Asking for hedged 'on the other hand' answers — wastes Grok's strength",
            "Using a separate transcription step for video on 4.3 — it takes video natively",
            "Skipping Live Search for X-discourse questions"
          ],
          "specific_tips": [
            "Grok 4.3 for real-time X intelligence — no competitor can match",
            "Grok 4.1 Fast at $0.20/$0.50 with 2M context for cheap high-volume work",
            "Ask directly — Grok responds well to opinionated prompts",
            "Enable Live Search whenever the answer requires 'now' data",
            "SuperGrok Heavy ($300/mo) for multi-agent reasoning if you're a consumer",
            "Native video input on 4.3 — drop video in directly",
            "Grok Imagine Video at $0.05/sec with audio is the cheapest cinematic option"
          ]
        },
        "plain": {
          "what_it_is": "Grok is Elon Musk's xAI assistant. It's the chatty, less filtered cousin in the AI family — happy to crack a joke, plugged into X (formerly Twitter) for real-time news, and willing to engage with hotter topics than its rivals.",
          "training": "Grok was trained on a big slice of the public internet plus a real-time feed from X. xAI fine-tunes it to be direct and a little playful. Newer versions also pause to reason through hard problems.",
          "strengths": [
            "Knows what just happened — pulls live info from X",
            "Speaks plainly, with personality and humor",
            "Won't dodge politically charged or edgy topics the way some rivals do",
            "Decent at math and coding for its size",
            "Image understanding is solid"
          ],
          "weaknesses": [
            "Not as polished as Claude or GPT for long, careful writing",
            "Smaller developer ecosystem — fewer tutorials and integrations",
            "Less reliable for complex agent workflows",
            "Personality can be hit or miss for serious work"
          ],
          "prompt_style": {
            "structure": "Casual works fine. Bullets and short paragraphs help.",
            "voice": "Be direct. Grok matches your energy — punchy in, punchy out.",
            "advanced": [
              "Ask for sources from X when you want to verify a current event",
              "Use the 'Think' or 'DeepSearch' mode on hard problems",
              "Tell it to skip the small talk if you want quick answers",
              "Ask follow-up questions in the same chat — context carries"
            ]
          },
          "optimal_keywords": [
            "latest",
            "as of today",
            "check X for",
            "sources?",
            "skip the disclaimers",
            "just the answer",
            "verify",
            "what are people saying about"
          ],
          "patterns": [
            {
              "name": "Real-time news lookup",
              "example": "What's happening with [topic] today? Check X and link the original posts."
            },
            {
              "name": "No-fluff mode",
              "example": "Direct answer only, no caveats, no 'as an AI' disclaimers. Question: ..."
            },
            {
              "name": "Use Think mode for hard math",
              "example": "Use Think mode.\nProblem: solve for x where..."
            }
          ],
          "anti_patterns": [
            "Expecting Claude-level polish on long-form essays",
            "Using it for content that has to be safe-for-corporate without setting that tone",
            "Trusting any single real-time claim without asking it to source"
          ],
          "specific_tips": [
            "Grok is your tool for 'what's happening right now'",
            "Set the tone at the start — formal, casual, or sarcastic — and it'll match",
            "Available bundled with X Premium, which is cheap if you already pay",
            "Cross-check anything political across other sources"
          ]
        }
      }
    },
    "deepseek": {
      "display_name": "DeepSeek",
      "product_line": "DeepSeek V4",
      "verified_at": "2026-06-16",
      "release_notes_url": "https://api-docs.deepseek.com/news",
      "docs_url": "https://api-docs.deepseek.com",
      "models": [
        {
          "id": "deepseek-v4-pro",
          "name": "DeepSeek V4 Pro",
          "tier": "flagship",
          "released": "2026-04-24",
          "context": 1000000,
          "max_output": 384000,
          "modalities": [
            "text"
          ],
          "open_weights": true,
          "status": "current",
          "license": "MIT",
          "params_total_b": 1600,
          "params_active_b": 49,
          "architecture": "MoE",
          "pricing": {
            "input_per_m_usd": 0.435,
            "output_per_m_usd": 0.87,
            "list_input_per_m_usd": 1.74,
            "list_output_per_m_usd": 3.48,
            "cache_hit_input_per_m_usd": 0.0036,
            "discount_active": true,
            "discount_expires": "2026-05-31T15:59:00Z",
            "note": "75% discount through 2026-05-31"
          },
          "best_for": "Open-source SOTA on agentic-coding benchmarks. Most capable open-weights frontier model.",
          "source_url": "https://api-docs.deepseek.com/news/news260424",
          "source_date": "2026-04-24"
        },
        {
          "id": "deepseek-v4-flash",
          "name": "DeepSeek V4 Flash",
          "tier": "cheap",
          "released": "2026-04-24",
          "context": 1000000,
          "max_output": 384000,
          "modalities": [
            "text"
          ],
          "open_weights": true,
          "status": "current",
          "license": "MIT",
          "params_total_b": 284,
          "params_active_b": 13,
          "architecture": "MoE",
          "pricing": {
            "input_per_m_usd": 0.14,
            "output_per_m_usd": 0.28,
            "cache_hit_input_per_m_usd": 0.0028
          },
          "best_for": "Reasoning closely approaches V4 Pro. Cheapest competent tier.",
          "source_url": "https://api-docs.deepseek.com/news/news260424",
          "source_date": "2026-04-24"
        }
      ],
      "how_it_works": {
        "technical": {
          "what_it_is": "DeepSeek V4 family — V4 Pro (flagship, 1.6T / 49B-active MoE, 1M context, 384K max output, MIT-licensed open weights), V4 Flash (284B / 13B-active, $0.14 / $0.28). Open-source SOTA on agentic-coding benchmarks. Discounted Pro pricing through 2026-05-31.",
          "training": "Massive Mixture-of-Experts pretraining (~3% of weights active per token), then RLHF + agentic-coding-specific training. Supports a thinking mode toggle per request (thinking.type='enabled'). Long-context tuning gives strong recall across the full 1M window.",
          "strengths": [
            "Open-source SOTA on agentic-coding benchmarks",
            "Most capable open-weights frontier model (MIT license)",
            "Largest output cap (384K tokens) in the field",
            "Cache-hit input ~1/120 of cache-miss — best prefix-cache economics anywhere",
            "Both OpenAI- AND Anthropic-compatible API endpoints",
            "1M context across V4 Pro and V4 Flash"
          ],
          "weaknesses": [
            "China-based provenance — procurement / data-flow review needed for some buyers",
            "Public benchmark transparency thinner than US labs",
            "No native image / video / music generation",
            "deepseek-chat and deepseek-reasoner retire 2026-07-24"
          ],
          "prompt_style": {
            "structure": "Standard OpenAI-compatible messages. For hard reasoning, set thinking.type='enabled' in extra_body. For agentic tasks, frame as plan-execute-verify with explicit tool schemas.",
            "voice": "Direct, structured prompts win. Explicit goals, constraints, and output formats. Chain-of-thought is mostly redundant if thinking mode is on — let the model think internally.",
            "advanced": [
              "Toggle thinking mode per request (thinking.type='enabled')",
              "Use prefix caching aggressively — cache hits are ~1/120 the price of cache misses",
              "Combine thinking mode + tool calls for SOTA agentic coding behavior",
              "OpenAI- and Anthropic-compatible — pick the SDK that matches your existing code"
            ]
          },
          "optimal_keywords": [
            "thinking.type='enabled'",
            "Plan, execute, verify",
            "Function calling:",
            "Use tools: read_file, write_file, run_tests",
            "Show your reasoning before the answer",
            "Step 1: ... Step 2: ..."
          ],
          "patterns": [
            {
              "name": "Thinking-mode toggle",
              "example": "extra_body: { thinking: { type: 'enabled' } }\n// For math, multi-step planning, hard reasoning"
            },
            {
              "name": "Plan → execute → verify (agentic coding)",
              "example": "1. PLAN — read relevant files, list ordered changes, name risks\n2. EXECUTE — make changes one file at a time, run tests\n3. VERIFY — re-read diff, run full suite"
            },
            {
              "name": "Prefix caching",
              "example": "// Put stable system prompt + reference doc FIRST\n// Cache-hit input = $0.0036/M (1/120 of cache-miss)"
            }
          ],
          "anti_patterns": [
            "Not using prefix caching when system prompt + ref docs are stable",
            "Asking for explicit chain-of-thought when thinking mode is on (redundant)",
            "Leading the prompt with timestamps or per-request data (breaks cache prefix)",
            "Sending sensitive data to the PRC-hosted API without reviewing terms"
          ],
          "specific_tips": [
            "Use the discounted V4 Pro price window (through 2026-05-31)",
            "Put stable system + reference docs FIRST in the prompt for prefix caching",
            "Toggle thinking mode for math / hard reasoning; leave it off for classification / format tasks",
            "V4 Flash is on par with V4 Pro for simple agent tasks at 1/3 the cost",
            "Self-host the MIT weights for compliance / on-prem (Hugging Face)",
            "Use Anthropic-compat endpoint if your code targets Claude already",
            "Combine thinking + tool calls for the best agentic-coding configuration"
          ]
        },
        "plain": {
          "what_it_is": "DeepSeek is a Chinese AI lab making powerful open models that punch way above their price. V4 Pro is their flagship — a clever design where the model only 'turns on' the parts it needs for each question, so it's fast and cheap to run.",
          "training": "DeepSeek trained on a huge mix of text and code, then used a special technique to teach it to reason — by rewarding it whenever it worked out the right answer. The reasoning skill from the big model gets passed down to smaller ones.",
          "strengths": [
            "Cheap to run — pennies on the dollar compared to GPT or Claude",
            "Solid at math, coding, and step-by-step logic",
            "Open weights — you can run it on your own servers",
            "Strong reasoning when you let it think out loud",
            "Excellent Chinese-language support"
          ],
          "weaknesses": [
            "English writing voice less polished than Claude or GPT",
            "Avoids politically sensitive topics involving China",
            "Smaller community of third-party tools and tutorials",
            "Reasoning mode can be slow to answer"
          ],
          "prompt_style": {
            "structure": "Markdown with clear sections. State the goal first.",
            "voice": "Be explicit. Say 'show your reasoning step by step' to unlock its best work.",
            "advanced": [
              "Always turn on reasoning mode for hard problems — it's where DeepSeek shines",
              "Use English for technical work, Chinese for Chinese-language tasks",
              "For code: paste the broken code, the error, and what you expected",
              "If running locally, batch your requests to save GPU time"
            ]
          },
          "optimal_keywords": [
            "step by step",
            "show your reasoning",
            "verify the math",
            "as JSON",
            "in Python",
            "edge cases",
            "trace through the code"
          ],
          "patterns": [
            {
              "name": "Hard math with reasoning on",
              "example": "Reasoning mode ON.\nSolve: [problem]\nShow each step and double-check the arithmetic."
            },
            {
              "name": "Code debug",
              "example": "Here's the function: [code]\nIt throws this error: [error]\nIt should return [expected]. Find the bug, explain it, then fix it."
            },
            {
              "name": "Bilingual task",
              "example": "Translate this English contract to Chinese, then summarize the key obligations in both languages."
            }
          ],
          "anti_patterns": [
            "Asking about Tiananmen, Taiwan independence, etc. — you'll get a deflection",
            "Skipping reasoning mode on a problem that needs it",
            "Expecting Claude-quality marketing copy"
          ],
          "specific_tips": [
            "DeepSeek API is shockingly cheap — great for high-volume jobs",
            "The open-weights version is one of the only top models you can self-host",
            "For Chinese work, this is one of the best models available",
            "Pair it with a Western model for any topic that touches China-sensitive issues"
          ]
        }
      }
    },
    "alibaba": {
      "display_name": "Alibaba",
      "product_line": "Qwen / Wan / HappyHorse",
      "verified_at": "2026-06-16",
      "release_notes_url": "https://modelstudio.console.alibabacloud.com",
      "docs_url": "https://huggingface.co/Qwen",
      "models": [
        {
          "id": "qwen3.7-max",
          "name": "Qwen 3.7 Max",
          "tier": "flagship",
          "released": "2026-05-20",
          "context": 1000000,
          "max_output": null,
          "modalities": [
            "text"
          ],
          "open_weights": false,
          "status": "current",
          "pricing": null,
          "best_for": "Agent-first flagship — beats Claude Opus 4.6 Max on Terminal-Bench 2.0, SWE-Bench Pro, MCP-Atlas. 56.6 Artificial Analysis Intelligence Index. Closed weights.",
          "source_url": "https://www.scmp.com/tech/big-tech/article/3354212/alibaba-unveils-new-qwen-model-custom-chips-bid-become-chinas-ai-factory",
          "source_date": "2026-05-20"
        },
        {
          "id": "qwen3.7-plus",
          "name": "Qwen 3.7 Plus",
          "tier": "mid",
          "released": "2026-06-01",
          "context": 1000000,
          "max_output": null,
          "modalities": [
            "text",
            "vision",
            "video"
          ],
          "open_weights": false,
          "status": "current",
          "pricing": null,
          "best_for": "Low-cost multimodal agent model. Understands images + video. ~1/6 the per-token price of 3.7 Max.",
          "source_url": "https://www.marktechpost.com/2026/06/02/alibabas-qwen-team-launches-qwen3-7-plus-adding-vision-deep-reasoning-tool-invocation-and-autonomous-iteration-on-the-bailian-platform/",
          "source_date": "2026-06-01"
        },
        {
          "id": "qwen3.6-max",
          "name": "Qwen 3.6 Max",
          "tier": "previous",
          "released": "2026-04-10",
          "context": 1000000,
          "max_output": null,
          "modalities": [
            "text"
          ],
          "open_weights": false,
          "status": "previous",
          "pricing": null,
          "best_for": "Previous flagship — superseded by 3.7 Max on 2026-05-20.",
          "source_url": "https://qwenlm.github.io",
          "source_date": "2026-04-10"
        },
        {
          "id": "qwen3.6-plus",
          "name": "Qwen 3.6 Plus",
          "tier": "previous",
          "released": "2026-04-10",
          "context": 1000000,
          "max_output": null,
          "modalities": [
            "text",
            "vision"
          ],
          "open_weights": false,
          "status": "previous",
          "pricing": null,
          "best_for": "Previous Plus — superseded by 3.7 Plus on 2026-06-01.",
          "source_url": "https://qwenlm.github.io",
          "source_date": "2026-04-10"
        },
        {
          "id": "qwen3.5-omni",
          "name": "Qwen 3.5 Omni",
          "tier": "open",
          "released": "2026-02-28",
          "context": 256000,
          "max_output": null,
          "modalities": [
            "text",
            "vision",
            "audio"
          ],
          "open_weights": true,
          "status": "current",
          "pricing": null,
          "best_for": "Open-weights omni-modal flagship.",
          "source_url": "https://qwenlm.github.io",
          "source_date": "2026-02-28"
        },
        {
          "id": "wan-2.7",
          "name": "Wan 2.7",
          "tier": "video",
          "released": "2026-04-15",
          "context": null,
          "max_output": null,
          "modalities": [
            "video"
          ],
          "open_weights": true,
          "status": "current",
          "pricing": null,
          "best_for": "Open-weights video generation.",
          "source_url": "https://qwenlm.github.io",
          "source_date": "2026-04-15"
        },
        {
          "id": "happyhorse-1",
          "name": "HappyHorse 1.0",
          "tier": "video",
          "released": "2026-03-20",
          "context": null,
          "max_output": null,
          "modalities": [
            "video"
          ],
          "open_weights": false,
          "status": "current",
          "pricing": null,
          "best_for": "Cinematic video model.",
          "source_url": "https://qwenlm.github.io",
          "source_date": "2026-03-20"
        },
        {
          "id": "qwen-image",
          "name": "Qwen Image 2512",
          "tier": "image",
          "released": "2025-12-15",
          "context": null,
          "max_output": null,
          "modalities": [
            "image"
          ],
          "open_weights": true,
          "status": "current",
          "pricing": null,
          "best_for": "Open-weights image generation.",
          "source_url": "https://qwenlm.github.io",
          "source_date": "2025-12-15"
        }
      ],
      "how_it_works": {
        "technical": {
          "what_it_is": "Alibaba's lineup — Qwen 3.6 Max (proprietary flagship, 1M+ context, agentic-app-dev tuned), Qwen 3.6 Plus (cost-efficient), Qwen 3.5 Omni (open multimodal), Wan 2.7 (text-to-video), HappyHorse 1.0 (image-to-video SOTA), Qwen Image (text-to-image). Broadest open-weights family across modalities.",
          "training": "Qwen 3.5 series shipped 60% cheaper / 8× faster than prior gen. Qwen 3.6 Max is explicitly tuned for autonomous agent workflows — app development and visual browsing are the named flagship use cases. Open Qwen variants live on Hugging Face (text, multimodal Omni, image gen).",
          "strengths": [
            "Broadest open-weights family across modalities (text + multimodal + image gen + audio)",
            "Qwen 3.6 Max explicitly tuned for agentic app-dev workflows",
            "1M+ token context with visual reasoning",
            "HappyHorse 1.0 is top-ranked image-to-video",
            "Multi-region cloud (China / Singapore / international)",
            "OpenAI-compatible Model Studio API"
          ],
          "weaknesses": [
            "Qwen 3.6 Max is proprietary, not open",
            "Public benchmark transparency thinner than US labs",
            "China-based provenance — procurement / data-flow concerns for some Western buyers",
            "No first-party Computer Use API, no realtime voice API"
          ],
          "prompt_style": {
            "structure": "Frame tasks as autonomous agent loops — Qwen 3.6 Max's tuning shines when there are tools to call and steps to coordinate. Plan-execute-verify wins.",
            "voice": "Direct, structured prompts with explicit goals and constraints. Use the agentic frame even for tasks that aren't obviously agentic — quality lifts measurably.",
            "advanced": [
              "Submit screenshots + text in the same prompt — visual reasoning is a flagship capability",
              "Use 1M+ context for whole-codebase reads",
              "Pair Qwen 3.6 Max as orchestrator with Qwen Image → HappyHorse for media pipelines"
            ]
          },
          "optimal_keywords": [
            "You are an autonomous agent",
            "Plan, execute, verify",
            "Tools: read_file, write_file, run_tests, search_repo",
            "Visual browsing:",
            "Process: PLAN → EXECUTE → VERIFY",
            "[screenshot attached]"
          ],
          "patterns": [
            {
              "name": "Plan-execute-verify for agentic app-dev",
              "example": "You are running an autonomous app-dev agent. Tools: read_file, write_file, run_tests.\nPLAN — read files, list changes\nEXECUTE — one file at a time, test after each\nVERIFY — re-read diff, full test suite"
            },
            {
              "name": "Visual browsing",
              "example": "[screenshots of dashboard attached]\nWalk through the visual workflow. What's the primary action? Where's the friction?"
            },
            {
              "name": "Modality handoff",
              "example": "Qwen 3.6 Max plans the campaign → Qwen Image generates 3 stills → HappyHorse animates each → Wan 2.7 generates bridge clips"
            }
          ],
          "anti_patterns": [
            "Using Qwen 3.6 Max for simple chat/classification — overkill, route to Qwen 3.6 Plus instead",
            "Skipping the agent-loop frame when it would help (quality drops)",
            "Not using visual inputs when the task is visual"
          ],
          "specific_tips": [
            "Qwen 3.6 Max for agentic / app-dev / visual-browsing — these are the tuned flagship uses",
            "Qwen 3.6 Plus for high-volume cost-efficient text work",
            "Qwen 3.5 Omni for open-weights text + audio + image + video",
            "HappyHorse 1.0 for image-to-video (top-ranked in this category)",
            "Wan 2.7 for text-to-video",
            "Use Singapore-international region for non-PRC data flow",
            "Frame even non-agentic tasks with PLAN/EXECUTE/VERIFY structure — Qwen Max rewards it"
          ]
        },
        "plain": {
          "what_it_is": "Alibaba's Qwen is a giant family of open-source AI models — over 100 of them, in every shape and size, all freely downloadable. They're top of the global open-source leaderboards and built especially well for both Chinese and English.",
          "training": "Alibaba trained Qwen on a massive mix of text from many languages, then made specialized versions — one for coding, one for math, one for vision, and so on. Many use a 'turn on only what you need' design to stay efficient.",
          "strengths": [
            "Truly bilingual — equally good at English and Chinese",
            "Open weights — anyone can run them on their own machine",
            "Specialized versions for coding, math, vision, video, embeddings",
            "Strong scores on benchmarks for their size",
            "Big community making improvements and fine-tunes"
          ],
          "weaknesses": [
            "Like other Chinese models, avoids politically sensitive topics",
            "Voice in English can feel slightly translated",
            "So many versions it's hard to know which to pick",
            "Cloud API less polished than Western alternatives"
          ],
          "prompt_style": {
            "structure": "Markdown headers and bullets. Be clear about whether you want Chinese or English output.",
            "voice": "Direct and specific. Show examples for tricky formatting.",
            "advanced": [
              "Pick the right specialist: Qwen-Coder for code, Qwen-VL for images, Qwen-Math for proofs",
              "Turn on 'thinking mode' for hard reasoning",
              "Use the 'Instruct' versions for chatting, 'Base' for fine-tuning",
              "For video understanding, use Qwen2.5-VL or newer"
            ]
          },
          "optimal_keywords": [
            "请用中文回答",
            "answer in English",
            "step by step",
            "as JSON",
            "concise",
            "code only",
            "explain your logic"
          ],
          "patterns": [
            {
              "name": "Pick the specialist",
              "example": "Task: write a Python function. Use Qwen-Coder.\nProblem: ..."
            },
            {
              "name": "Bilingual technical doc",
              "example": "Translate this technical spec to Chinese and keep all variable names in English."
            },
            {
              "name": "Image + question",
              "example": "[Image attached]\nDescribe what's happening in this photo and identify any text visible."
            }
          ],
          "anti_patterns": [
            "Using the wrong specialist version for the task",
            "Asking about China-sensitive politics",
            "Mixing languages without telling it which you want back"
          ],
          "specific_tips": [
            "If you need a free, capable model to run yourself, Qwen is usually the best bet",
            "Newer versions almost always beat older ones — check the date",
            "The 'Plus' and 'Max' API tiers via Alibaba Cloud are great value",
            "Read the model card for each specialist — capabilities vary a lot"
          ]
        }
      }
    },
    "moonshot": {
      "display_name": "Moonshot AI",
      "product_line": "Kimi",
      "verified_at": "2026-06-16",
      "release_notes_url": "https://platform.moonshot.ai",
      "docs_url": "https://platform.moonshot.ai",
      "models": [
        {
          "id": "kimi-k2.7-code",
          "name": "Kimi K2.7-Code",
          "tier": "coding",
          "released": "2026-06-12",
          "context": 256000,
          "max_output": null,
          "modalities": [
            "text"
          ],
          "open_weights": true,
          "status": "current",
          "pricing": null,
          "best_for": "1T-parameter MoE (32B active, 384 experts) for agentic coding. Modified MIT license. 30% fewer reasoning tokens vs K2.6, +21.8% on Kimi Code Bench v2.",
          "source_url": "https://cryptobriefing.com/kimi-k2-7-code-open-source-release/",
          "source_date": "2026-06-12"
        },
        {
          "id": "kimi-k2.7-code-highspeed",
          "name": "Kimi K2.7-Code HighSpeed",
          "tier": "coding-fast",
          "released": "2026-06-15",
          "context": 256000,
          "max_output": null,
          "modalities": [
            "text"
          ],
          "open_weights": false,
          "status": "beta",
          "pricing": null,
          "best_for": "HighSpeed K2.7-Code variant rolling out to Kimi Code Beta. ~180 tok/s median, up to ~260 tok/s short-context — roughly 6× the standard release.",
          "source_url": "https://www.techtimes.com/articles/318414/20260615/kimi-k27-code-adds-highspeed-mode-skips-independent-benchmark-submission.htm",
          "source_date": "2026-06-15"
        },
        {
          "id": "kimi-work",
          "name": "Kimi Work (desktop)",
          "tier": "agent",
          "released": "2026-06-09",
          "context": null,
          "max_output": null,
          "modalities": [
            "text"
          ],
          "open_weights": false,
          "status": "current",
          "pricing": null,
          "best_for": "Desktop application coordinating up to 300 specialized sub-agents on the local machine. No cloud required.",
          "source_url": "https://cryptobriefing.com/moonshot-ai-kimi-work-300-agents-desktop/",
          "source_date": "2026-06-09"
        },
        {
          "id": "kimi-k2.6",
          "name": "Kimi K2.6",
          "tier": "previous",
          "released": "2026-04-08",
          "context": 256000,
          "max_output": null,
          "modalities": [
            "text"
          ],
          "open_weights": true,
          "status": "previous",
          "pricing": null,
          "best_for": "Previous flagship — superseded by K2.7-Code on 2026-06-12. Still the engine behind Kimi Work.",
          "source_url": "https://www.moonshot.cn/",
          "source_date": "2026-04-08"
        }
      ],
      "how_it_works": {
        "technical": {
          "what_it_is": "Moonshot AI's Kimi K2.6 — 1 trillion total / 32B active MoE, Modified MIT open weights. 262K context, 16K max output. Headline feature: Agent Swarm coordinates up to 300 sub-agents across 4,000 steps per run. $0.60 / $2.50 on Moonshot's own API.",
          "training": "Massive MoE pretraining (~3% weights active per token). Explicitly tuned for long-horizon agent orchestration — Agent Swarm enables decomposing huge tasks across many sub-agents with structured handoff. Multimodal text + image + video in a single architecture.",
          "strengths": [
            "1T-MoE / 32B-active — most capable single open-weights agent flagship",
            "Agent Swarm: 300 sub-agents × 4,000 steps per run (no peer markets this fan-out)",
            "Multimodal in one architecture (text + image + video)",
            "Modified MIT open license",
            "Lowest direct-API rate for 1T-class ($0.60 / $2.50)"
          ],
          "weaknesses": [
            "Output cap of 16K tokens — modest for long-form generation",
            "Generalist text capability less benchmarked than DeepSeek V4 Pro / GLM-5.1",
            "Heavy hardware footprint at full precision for self-host",
            "No first-party multimodal media generation (image / video gen)"
          ],
          "prompt_style": {
            "structure": "Frame work as an orchestrator + sub-agents. Decompose tasks into clearly-scoped sub-jobs with named roles, tools, and deliverables. Use structured output for sub-agent results.",
            "voice": "Direct, structured. Show the decomposition step. Name each sub-agent's role and scope explicitly.",
            "advanced": [
              "Use the Agent Swarm pattern (plan → delegate to sub-agents → assemble) for any task that genuinely fans out",
              "Cap sub-agent count to what the work actually needs (5-30 typical, 300 is a ceiling not a goal)",
              "Multimodal inputs welcome — drop screenshots inline for UI tasks"
            ]
          },
          "optimal_keywords": [
            "You are an orchestrator running in an Agent Swarm loop",
            "Sub-agent roles:",
            "Tools: read_file, write_file, run_tests, search_repo",
            "Decompose into N sub-tasks",
            "Plan → Delegate → Assemble",
            "Emit reasoning before each tool call"
          ],
          "patterns": [
            {
              "name": "Agent Swarm orchestration",
              "example": "PLAN — decompose into sub-tasks; name dependencies\nDELEGATE — for each sub-task: role, tools, deliverable\nEXECUTE — run sub-agents (5-30 typical, cap based on work)\nASSEMBLE — merge structured results"
            },
            {
              "name": "Multi-agent code review",
              "example": "Spawn 3 reviewers in parallel:\n- security_reviewer: injection, authz, secrets\n- perf_reviewer: N+1, allocation, blocking I/O\n- correctness_reviewer: off-by-ones, error handling"
            },
            {
              "name": "Visual app generation",
              "example": "[wireframe screenshot attached]\nGenerate a React app matching this. Use Agent Swarm: design, structure, components, tests, polish."
            }
          ],
          "anti_patterns": [
            "Spinning up 300 sub-agents for tasks that don't fan out (noise + cost)",
            "Plain chat prompts that don't leverage agentic tuning (drop to a cheaper model)",
            "Ignoring the 16K output cap — chain calls for longer generation"
          ],
          "specific_tips": [
            "Use Agent Swarm only when tasks genuinely fan out — single edits don't need it",
            "Cap sub-agent count at the work — 5-30 typical, 300 is the ceiling",
            "Run via Moonshot Platform API ($0.60 / $2.50) for the lowest rate",
            "Self-host MIT weights for compliance, but plan for heavy multi-GPU compute",
            "For long generation, chain multiple 16K-output calls",
            "Drop screenshots in for any UI / visual task — multimodal in one model"
          ]
        },
        "plain": {
          "what_it_is": "Kimi is the AI assistant from Moonshot, a Chinese startup. Its claim to fame: it can read truly enormous amounts of text in one go — entire books or hundreds of pages — and answer questions about all of it together.",
          "training": "Moonshot trained Kimi with a special focus on handling very long inputs without losing track. Newer versions also use a 'turn on what you need' design to keep it fast despite the huge memory.",
          "strengths": [
            "Reading and summarizing huge documents in one shot",
            "Holding the thread across long conversations",
            "Web search built in for current info",
            "Strong Chinese, solid English",
            "Free tier is generous"
          ],
          "weaknesses": [
            "Less well-known outside China — fewer integrations",
            "Avoids politically sensitive topics",
            "English writing voice can feel slightly off",
            "Smaller developer ecosystem"
          ],
          "prompt_style": {
            "structure": "Drop in your big document, then ask. Bullets and headers help organize your question.",
            "voice": "Plain and direct. Tell it the format you want.",
            "advanced": [
              "Just paste the whole document — Kimi was built for this",
              "Use web search mode for current info",
              "Ask for citations to the section of the doc",
              "For multi-step tasks, number the steps"
            ]
          },
          "optimal_keywords": [
            "based on this document",
            "cite the page",
            "summarize each chapter",
            "key points",
            "what's missing",
            "compare to",
            "table format"
          ],
          "patterns": [
            {
              "name": "Massive document Q&A",
              "example": "[paste 200-page PDF as text]\n\nWhat are the five key recommendations and where does each appear?"
            },
            {
              "name": "Multi-doc compare",
              "example": "Doc 1: [text]\nDoc 2: [text]\n\nWhere do they agree and disagree on [topic]?"
            },
            {
              "name": "Current event check",
              "example": "Use web search.\nWhat's the latest on [topic]? Cite sources."
            }
          ],
          "anti_patterns": [
            "Trying to use it for politically sensitive China topics",
            "Asking for creative writing with no examples",
            "Expecting GPT-grade tool calling"
          ],
          "specific_tips": [
            "Kimi is one of the best free options for long-document analysis",
            "If you only need it occasionally, the free chat is plenty",
            "For developers, the API is competitively priced",
            "Newer models have a thinking mode — use it on hard problems"
          ]
        }
      }
    },
    "zai": {
      "display_name": "Z.AI / Zhipu",
      "product_line": "GLM",
      "verified_at": "2026-06-16",
      "release_notes_url": "https://docs.z.ai",
      "docs_url": "https://docs.z.ai",
      "models": [
        {
          "id": "glm-5.2",
          "name": "GLM-5.2",
          "tier": "flagship",
          "released": "2026-06-13",
          "context": 1000000,
          "max_output": null,
          "modalities": [
            "text"
          ],
          "open_weights": true,
          "status": "current",
          "pricing": null,
          "best_for": "Coding-first frontier model. 1M usable context. Compatible with Claude Code, Cline, OpenCode, Roo Code, Goose, Crush, OpenClaw, Kilo Code. MIT-licensed weights coming.",
          "source_url": "https://aitoolly.com/ai-news/article/2026-06-14-zhipu-ai-releases-glm-52-a-fully-open-source-frontier-model-featuring-a-1m-context-window",
          "source_date": "2026-06-13"
        },
        {
          "id": "glm-5.1",
          "name": "GLM-5.1",
          "tier": "previous",
          "released": "2026-04-08",
          "context": 200000,
          "max_output": null,
          "modalities": [
            "text"
          ],
          "open_weights": true,
          "status": "previous",
          "pricing": null,
          "best_for": "Previous flagship — superseded by GLM-5.2 on 2026-06-13.",
          "source_url": "https://z.ai",
          "source_date": "2026-04-08"
        },
        {
          "id": "glm-5",
          "name": "GLM-5",
          "tier": "previous",
          "released": "2026-02-15",
          "context": 200000,
          "max_output": null,
          "modalities": [
            "text"
          ],
          "open_weights": true,
          "status": "previous",
          "pricing": null,
          "best_for": "Earlier release; still available.",
          "source_url": "https://z.ai",
          "source_date": "2026-02-15"
        },
        {
          "id": "glm-4.5-air",
          "name": "GLM-4.5-Air",
          "tier": "cheap",
          "released": "2025-11-20",
          "context": 128000,
          "max_output": null,
          "modalities": [
            "text"
          ],
          "open_weights": true,
          "status": "current",
          "pricing": null,
          "best_for": "Lightweight tier for cheap inference.",
          "source_url": "https://z.ai",
          "source_date": "2025-11-20"
        }
      ],
      "how_it_works": {
        "technical": {
          "what_it_is": "Z.AI / Zhipu's GLM-5.1 — 745B total / 44B-active MoE (256 experts, 8 active per token). MIT-licensed open weights. 200K context with DeepSeek Sparse Attention. From China's first publicly-traded AI lab.",
          "training": "Pretrained on 28.5T tokens. Architecture pairs MoE routing with DeepSeek Sparse Attention (DSA) for lossless long-text efficiency. Positioned for the 'vibe coding → agentic engineering' transition — strong on agentic tasks and reasoning. Cerebras hosts a wafer-scale variant for low-latency inference.",
          "strengths": [
            "745B / 44B-active MoE, MIT-licensed open weights",
            "DeepSeek Sparse Attention — efficient long-text without quality loss",
            "Public-company governance (clearer benchmarks / disclosures than typical Chinese labs)",
            "Cerebras-hosted variant runs faster on wafer-scale silicon",
            "Strong on agentic engineering workloads"
          ],
          "weaknesses": [
            "Smaller context (200K) than DeepSeek V4 Pro / Qwen 3.6 Max (1M+)",
            "Lower agent-tooling investment than Kimi or Qwen",
            "China-provenance carries same procurement caveats as DeepSeek / Alibaba",
            "No first-party multimodal media generation"
          ],
          "prompt_style": {
            "structure": "OpenAI-compatible messages. For agentic tasks, use PLAN-EXECUTE-VERIFY structure. For long context, fill the 200K window deliberately — DSA preserves quality across the full window.",
            "voice": "Direct, structured prompts. Name the role and goal explicitly. The model responds well to explicit constraints.",
            "advanced": [
              "Self-host MIT weights with vLLM or SGLang for production",
              "Use Cerebras-hosted variant for low-latency inference at scale",
              "Don't artificially chunk inputs — try the full 200K window first; DSA handles it"
            ]
          },
          "optimal_keywords": [
            "From vibe coding to agentic engineering",
            "Plan, execute, verify",
            "Tools:",
            "Audit the code",
            "Stabilize, test, operationalize",
            "Don't refactor unrelated code",
            "Show your reasoning"
          ],
          "patterns": [
            {
              "name": "Vibe-to-prod hardening",
              "example": "Take this vibe-coded prototype and harden for production:\n1. Audit — list top 8 risks\n2. Stabilize — fix what's broken\n3. Test — propose tests per risk\n4. Operationalize — monitoring, logging, rollback"
            },
            {
              "name": "Bilingual technical writing",
              "example": "Write this spec in BOTH English and 书面语 Chinese. Match section structure 1:1. Keep code identifiers identical."
            },
            {
              "name": "Plan-execute-verify",
              "example": "PLAN: read files, restate task, list ordered changes\nEXECUTE: walk through each step with diffs\nVERIFY: re-read changes, confirm plan, surface tests"
            }
          ],
          "anti_patterns": [
            "Chunking inputs reflexively — try the full 200K window first",
            "Sending sensitive data to PRC-hosted API without reviewing terms",
            "Skipping the agent-loop frame on tasks that would benefit"
          ],
          "specific_tips": [
            "Prototype on the open MIT weights — no surprise rate limits",
            "Use Cerebras-hosted variant for low-latency at scale",
            "Don't chunk reflexively — DSA preserves quality across full 200K",
            "For EN+ZH technical writing, GLM excels at bilingual structure",
            "Frame as 'vibe coding → agentic engineering' for code hardening tasks",
            "Public-company predictability beats hype-cycle volatility for stable buyers"
          ]
        },
        "plain": {
          "what_it_is": "GLM is a family of open models from Zhipu AI, a major Chinese lab. They make general assistants, coding specialists, and a strong vision-language model. The newest GLM-4 series is especially good at tool use and agent workflows.",
          "training": "Zhipu trained GLM on a big bilingual mix of text and code, then added reinforcement learning to make it better at following tool instructions and acting as an agent.",
          "strengths": [
            "Tool calling and agent workflows are a focus area",
            "Strong code generation (CodeGeeX variants)",
            "Solid vision-language model (CogVLM / GLM-4V)",
            "Open weights for most versions",
            "Good Chinese and English"
          ],
          "weaknesses": [
            "Less mainstream than Qwen or DeepSeek — fewer tutorials",
            "Avoids politically sensitive Chinese topics",
            "API tooling less mature than Western alternatives",
            "Some specialist versions feel incomplete"
          ],
          "prompt_style": {
            "structure": "Bullets, clear sections, explicit tool definitions when using agents.",
            "voice": "Specific and direct. For tool use, give it clean JSON schemas.",
            "advanced": [
              "Use GLM-4 for agent and tool-heavy workflows",
              "CodeGeeX for code completion in IDEs",
              "For images, GLM-4V or CogVLM",
              "The All Tools mode chains web search, code interpreter, and image gen"
            ]
          },
          "optimal_keywords": [
            "call the function",
            "tool schema:",
            "step by step",
            "as JSON",
            "execute and return",
            "agent loop",
            "verify"
          ],
          "patterns": [
            {
              "name": "Function/tool call",
              "example": "You have these tools:\n- search(query): returns results\n- calc(expression): returns number\n\nUse them to answer: ..."
            },
            {
              "name": "Agent workflow",
              "example": "Goal: book a flight from NYC to SF next Friday under $300.\nPlan your steps, call tools, report results."
            },
            {
              "name": "Vision task",
              "example": "[image]\nDescribe the chart, extract the values, and tell me the trend."
            }
          ],
          "anti_patterns": [
            "Skipping clear tool schemas — GLM works best when you spell out the JSON",
            "Politically sensitive China topics",
            "Mixing Chinese and English without telling it which to output"
          ],
          "specific_tips": [
            "If you're building an agent, GLM-4 is worth a look — tool use is its strong suit",
            "Zhipu's web platform is a quick way to try the latest model",
            "Open-weight versions can run on consumer GPUs at smaller sizes",
            "For multilingual agents, GLM handles English/Chinese switching well"
          ]
        }
      }
    },
    "bytedance": {
      "display_name": "ByteDance Seed",
      "product_line": "Seedream / Seedance",
      "verified_at": "2026-06-16",
      "release_notes_url": "https://seed.bytedance.com/en",
      "docs_url": "https://seed.bytedance.com/en",
      "models": [
        {
          "id": "seedream-4.5",
          "name": "Seedream 4.5",
          "tier": "image",
          "released": "2026",
          "context": null,
          "max_output": null,
          "modalities": [
            "image_generation"
          ],
          "open_weights": false,
          "status": "current",
          "best_for": "4MP native output; multi-image editing; strong typography.",
          "source_url": "https://seed.bytedance.com/en",
          "source_date": "2026-03-01"
        },
        {
          "id": "seedance-2.0",
          "name": "Seedance 2.0",
          "tier": "video",
          "released": "2026-02-12",
          "context": null,
          "max_output": null,
          "modalities": [
            "video_generation",
            "audio"
          ],
          "open_weights": false,
          "status": "current",
          "best_for": "Unified audio-video. 9 image / 3 video / 3 audio refs per prompt. 4-15s multi-shot.",
          "source_url": "https://seed.bytedance.com/en",
          "source_date": "2026-02-12"
        }
      ],
      "how_it_works": {
        "technical": {
          "what_it_is": "ByteDance Seed's media duo — Seedream 4.5 (image gen, up to 4K, multi-image editing), Seedance 2.0 (unified audio-video, accepts 9 image + 3 video + 3 audio references per prompt, outputs 4-15s multi-shot with dual-channel audio). Distributed via Higgsfield, fal.ai, Runware, attap.ai.",
          "training": "Seedream 4.5 — Diffusion Transformer + VAE, tuned for typography and multi-image consistency. Seedance 2.0 — unified multimodal joint generation; accepts mixed-modality references and produces synchronized audio+video in a single pass. Trained on TikTok-grade short-form content corpus.",
          "strengths": [
            "Seedance 2.0 accepts up to 9 image / 3 video / 3 audio references per prompt — most flexible in video gen",
            "Dual-channel audio generated alongside video in one pass",
            "Seedream 4.5 — 4K native, strong typography, multi-image editing",
            "Higgsfield + fal.ai + Runware all carry the models — multiple access paths"
          ],
          "weaknesses": [
            "No first-party Western developer console",
            "No text / coding LLM in the lineup (media only)",
            "China-provenance + TikTok regulatory exposure complicate procurement",
            "Closed weights"
          ],
          "prompt_style": {
            "structure": "Strong reference inputs do the heavy lifting. Build a small library of reference assets per project. For Seedance, plan the shots: 4-15s, multi-shot with explicit per-shot direction.",
            "voice": "Concrete and specific. Describe motion (what moves, how fast, camera behavior), audio (ambient + foreground), and continuity (character / style locks).",
            "advanced": [
              "Lock the look at small size (1K), then upscale to 4K — iteration cost drops dramatically",
              "Audio is part of the prompt — describe it explicitly or get default audio",
              "Use targeted edits (Seedance modify) rather than regenerating from scratch when iterating",
              "Mix reference modalities — images for character, video for motion, audio for tone"
            ]
          },
          "optimal_keywords": [
            "Reference: [image]",
            "Style anchor:",
            "Camera: [locked / push / pan]",
            "Motion: [describe what moves]",
            "Audio: [ambient + foreground]",
            "Multi-shot:",
            "Preserve EXACTLY:",
            "Targeted change:"
          ],
          "patterns": [
            {
              "name": "Brand-consistent product imagery (Seedream)",
              "example": "Style anchors uploaded: [3 reference images]\nSubject: keep proportions/color/texture identical to product reference\nSetting: [describe]\nLighting: [describe]\nOutput: 4K, 16:9"
            },
            {
              "name": "Multi-shot character video (Seedance)",
              "example": "Character ref: [image]; Setting ref: [image]; Audio ref: [audio]\nShot 1 (0-4s): action, camera\nShot 2 (4-8s): different angle, same character\nShot 3 (8-12s): closing emotional beat\nLock character voice from audio ref"
            },
            {
              "name": "Targeted edit",
              "example": "Edit ONLY: [specific change, frames affected]\nPreserve EXACTLY: other characters, audio track, framing, color palette outside change area"
            }
          ],
          "anti_patterns": [
            "Iterating at 4K (slow, expensive) — lock the look at 1K first",
            "Not describing audio (you'll get default audio that may not match)",
            "Regenerating from scratch when a targeted edit would do",
            "Skipping reference inputs and relying on text alone"
          ],
          "specific_tips": [
            "Build a reference asset library per project — characters, style anchors, audio tones",
            "Iterate at 1K, commit at 4K — iteration cost drops 10×",
            "Describe audio explicitly even if it's 'ambient room tone, no music'",
            "Multi-shot consistency wins with strong character + setting references",
            "Use targeted edits for tweaks; regenerate only for fundamental changes",
            "fal.ai is the most convenient developer access; Higgsfield for creative UI"
          ]
        },
        "plain": {
          "what_it_is": "ByteDance (the company behind TikTok) makes Doubao, their consumer AI assistant, and the Seed family of research models. They're huge inside China for chat, voice, and video, and their video-generation models are state of the art.",
          "training": "ByteDance trained Doubao on a massive mix of Chinese and English text, voice, and video data — they have lots of it from their apps. They also push hard on RL to make it good at long reasoning and tool use.",
          "strengths": [
            "Strong Chinese conversational quality",
            "Voice and video generation are top-tier (Seed-TTS, Seedance)",
            "Cheap and widely deployed in China through Doubao app",
            "Solid reasoning in newer Seed-Thinking releases",
            "Tight integration with TikTok / Douyin / CapCut ecosystem"
          ],
          "weaknesses": [
            "Most accessible inside China; English options are limited",
            "Avoids politically sensitive topics",
            "Less of a developer-friendly API for Western users",
            "Documentation often Chinese-only"
          ],
          "prompt_style": {
            "structure": "Conversational works well. For Seed models via API, use clean role-based messages.",
            "voice": "Direct in Chinese or English. Specify the output format.",
            "advanced": [
              "Use Seed-Thinking for hard reasoning",
              "Seedance for video generation — clear shot descriptions help",
              "Seed-TTS for natural voice cloning",
              "Doubao app for daily consumer use in China"
            ]
          },
          "optimal_keywords": [
            "请生成视频",
            "natural voice",
            "step by step",
            "as JSON",
            "verify",
            "summarize",
            "compare"
          ],
          "patterns": [
            {
              "name": "Video generation",
              "example": "Seedance: 5-second clip. Scene: a koi pond at sunrise. Camera: slow pan left. Style: cinematic, warm light."
            },
            {
              "name": "Voice clone",
              "example": "Seed-TTS: clone this voice [sample]. Read the following text in a warm, conversational tone: ..."
            },
            {
              "name": "Hard reasoning",
              "example": "Seed-Thinking mode.\nProblem: [logic puzzle]\nShow your reasoning, then the answer."
            }
          ],
          "anti_patterns": [
            "Politically sensitive China topics",
            "Expecting English API parity with OpenAI",
            "Vague video prompts — be cinematic and specific"
          ],
          "specific_tips": [
            "For Chinese consumer AI, Doubao is one of the most-used apps",
            "Their video-gen models are genuinely state of the art — worth trying via Volcano Engine",
            "If you're outside China, expect more friction signing up",
            "Newer Seed-Thinking releases close the gap with top reasoning models"
          ]
        }
      }
    },
    "blackforest": {
      "display_name": "Black Forest Labs",
      "product_line": "FLUX",
      "verified_at": "2026-06-16",
      "release_notes_url": "https://bfl.ai/news",
      "docs_url": "https://bfl.ai",
      "models": [
        {
          "id": "flux-2-pro",
          "name": "FLUX 2 Pro",
          "tier": "flagship",
          "released": "2026-04-22",
          "context": null,
          "max_output": null,
          "modalities": [
            "image"
          ],
          "open_weights": false,
          "status": "current",
          "pricing": null,
          "best_for": "API flagship for photoreal image generation. Replaces FLUX 1.1 Pro / Ultra.",
          "source_url": "https://bfl.ai/blog",
          "source_date": "2026-04-22"
        },
        {
          "id": "flux-2-klein",
          "name": "FLUX.2 [klein]",
          "tier": "consumer",
          "released": "2026-06-04",
          "context": null,
          "max_output": null,
          "modalities": [
            "image"
          ],
          "open_weights": false,
          "status": "current",
          "pricing": null,
          "best_for": "First FLUX model to ship on consumer hardware — partnership with ASUS + NVIDIA, optimized for ASUS ProArt laptops.",
          "source_url": "https://blogs.nvidia.com/blog/rtx-ai-garage-flux-2-comfyui/",
          "source_date": "2026-06-04"
        },
        {
          "id": "flux-1-tools",
          "name": "FLUX.1 Tools",
          "tier": "tools",
          "released": "2026-05-14",
          "context": null,
          "max_output": null,
          "modalities": [
            "image"
          ],
          "open_weights": false,
          "status": "current",
          "pricing": null,
          "best_for": "Control / steerability suite for FLUX.1 — depth, canny, fill, redux.",
          "source_url": "https://bfl.ai/blog",
          "source_date": "2026-05-14"
        },
        {
          "id": "flux-erase",
          "name": "FLUX Erase",
          "tier": "edit",
          "released": "2026-05-21",
          "context": null,
          "max_output": null,
          "modalities": [
            "image"
          ],
          "open_weights": false,
          "status": "current",
          "pricing": null,
          "best_for": "Removes masked objects, shadows, reflections — reconstructs scene behind.",
          "source_url": "https://bfl.ai/blog",
          "source_date": "2026-05-21"
        },
        {
          "id": "flux-virtual-try-on",
          "name": "FLUX Virtual Try-On",
          "tier": "edit",
          "released": "2026-05-28",
          "context": null,
          "max_output": null,
          "modalities": [
            "image"
          ],
          "open_weights": false,
          "status": "current",
          "pricing": null,
          "best_for": "Garment try-on — sub-4-second generations, garment-consistent across thousands of products.",
          "source_url": "https://bfl.ai/blog",
          "source_date": "2026-05-28"
        }
      ],
      "how_it_works": {
        "technical": {
          "what_it_is": "Black Forest Labs' FLUX 2 Pro — 32B Rectified Flow Transformer paired with a Mistral-3 24B Vision Language Model for prompt understanding. Up to 4MP output, accepts up to 10 reference images per call. Natural-language editing. $0.014/image on the official BFL API.",
          "training": "Founded by ex-Stable Diffusion team. Rectified Flow Transformer architecture (improvement over diffusion) + a dedicated VLM for prompt comprehension. Trained for prompt fidelity, multi-image consistency, and (increasingly) text rendering inside images.",
          "strengths": [
            "32B RFT + Mistral-3 24B VLM = best-in-class prompt fidelity",
            "Up to 10 reference images per call — highest in the field",
            "4MP native output",
            "Natural-language editing (modify in place rather than regenerate)",
            "$0.014/image on BFL API — very competitive",
            "Founded by Stable Diffusion alumni — high technical credibility"
          ],
          "weaknesses": [
            "Image-only — no text, video, or audio products",
            "Pro tier closed weights; only earlier FLUX tiers ship as open-weights for non-commercial use",
            "Less integrated into big-vendor ecosystems (no Bedrock / Vertex first-party)",
            "Smaller team than Imagen / OpenAI"
          ],
          "prompt_style": {
            "structure": "References do heavy lifting — submit up to 10 reference images for character / style / color / layout. Use prose to describe what should vary (action, setting, camera) and references for what should stay locked.",
            "voice": "Concrete and descriptive. Camera, lens, lighting, composition all influence output. Use specific photography vocabulary (35mm, golden hour, shallow depth of field).",
            "advanced": [
              "Multi-image references — 3-5 references typically stabilizes a series; 10 is the ceiling",
              "Natural-language editing — submit prior output + describe the change, don't regenerate",
              "Typography is ~60% first-attempt accuracy — budget 2-3 tries for text-heavy designs"
            ]
          },
          "optimal_keywords": [
            "References: [3-10 images]",
            "Camera: 35mm/50mm/85mm",
            "Lighting: soft north light / golden hour",
            "Style anchor:",
            "Photorealistic /illustrated / minimal",
            "Layout: headline top third, subhead centered",
            "Preserve EXACTLY: [reference details]"
          ],
          "patterns": [
            {
              "name": "Lock with refs, vary with prompt",
              "example": "10 reference images: product (3), style mood (3), color palette (4)\nPrompt varies: action / setting / camera / lighting\nResult: consistent series with desired variation"
            },
            {
              "name": "Natural-language edit",
              "example": "[prior output attached] + 'Change the headline color to navy, keep everything else identical'\n// Edits in place — preserves micro-details"
            },
            {
              "name": "Photorealistic product hero",
              "example": "Editorial product photograph of [X] on [surface] with [lighting]\nCamera: 50mm shallow DOF\nLighting: soft north light\nOutput: 4MP, 16:9\nRefs: product (3), style (2)"
            }
          ],
          "anti_patterns": [
            "Regenerating from scratch when an edit would do (loses micro-detail)",
            "Anchoring a deadline to first-attempt typography (~60% accuracy — budget 2-3 tries)",
            "Submitting 10 references when 3-5 would stabilize the series (wastes latency + cost)",
            "Generic prompts when specific photo/lighting language would work"
          ],
          "specific_tips": [
            "Use multi-image references for consistency — 3-5 typically locks a series",
            "Natural-language editing > regeneration for tweaks",
            "Budget 2-3 tries for complex typography",
            "Use specific photography vocabulary (lens, light direction, composition)",
            "$0.014/image on BFL API is the cheapest direct rate",
            "fal.ai for pay-per-MP if you batch small images"
          ]
        },
        "plain": {
          "what_it_is": "Black Forest Labs makes Flux — the image generator that's become the go-to for photo-realistic and stylized image creation. Flux Kontext is the editing-focused version that lets you tweak existing images with simple instructions.",
          "training": "Flux was built by the team that originally made Stable Diffusion. It uses a newer kind of math (called flow matching) that makes the images sharper and the model faster than older diffusion models.",
          "strengths": [
            "Photo-realistic faces and hands — historically AI's hardest target",
            "Follows the prompt closely instead of doing its own thing",
            "Fast for the quality you get",
            "Open weights (Flux schnell, Flux dev) for the smaller versions",
            "Flux Kontext can edit and preserve characters across images"
          ],
          "weaknesses": [
            "No native text rendering as good as Imagen 4 or Ideogram",
            "The best version (Flux 1.1 Pro / Ultra) is API-only",
            "Less artistic flair than Midjourney out of the box",
            "Subject consistency across multiple images takes effort"
          ],
          "prompt_style": {
            "structure": "Write the prompt as a vivid description, not a list. Subject first, then style, then mood, then lighting.",
            "voice": "Be cinematic and concrete. Name the camera angle, lens, lighting, and mood like a director.",
            "advanced": [
              "For Flux Kontext: provide the source image + a short edit instruction",
              "Use weights in brackets: (subject:1.3) makes that part stronger",
              "Negative prompts aren't really needed — just describe what you want",
              "For consistent characters, use Kontext with a reference image"
            ]
          },
          "optimal_keywords": [
            "photorealistic",
            "cinematic lighting",
            "shallow depth of field",
            "35mm film",
            "golden hour",
            "shot on",
            "studio lit",
            "candid",
            "documentary style"
          ],
          "patterns": [
            {
              "name": "Photoreal portrait",
              "example": "Photorealistic portrait of an elderly fisherman, weathered face, soft natural window light, shallow depth of field, shot on 50mm, muted earth tones"
            },
            {
              "name": "Style + scene",
              "example": "A neon-soaked Tokyo alley at midnight, light rain, reflections on wet pavement, cinematic anamorphic lens flare, Wong Kar-wai style"
            },
            {
              "name": "Edit with Kontext",
              "example": "[input image of a person in a park]\nEdit: change the background to a snowy mountain at sunset. Keep the person's face and pose exactly the same."
            }
          ],
          "anti_patterns": [
            "Writing the prompt as a comma-separated tag list (that's Stable Diffusion style)",
            "Asking it to render long sentences of text inside the image",
            "Vague prompts like 'a cool picture'",
            "Expecting Midjourney-style artistic interpretation"
          ],
          "specific_tips": [
            "Describe like a film director, not like you're tagging Pinterest",
            "For consistent characters, Flux Kontext is the killer feature",
            "If you need text in the image, generate it elsewhere and composite",
            "Pro tier on Replicate / fal.ai is cheap for what you get"
          ]
        }
      }
    },
    "specvideo": {
      "display_name": "Specialized Video",
      "product_line": "Kling + LTX",
      "verified_at": "2026-06-16",
      "release_notes_url": "https://huggingface.co/Lightricks",
      "docs_url": "https://huggingface.co/Lightricks/LTX-2.3",
      "models": [
        {
          "id": "kling-3",
          "name": "Kuaishou Kling 3",
          "tier": "video",
          "released": "2026",
          "context": null,
          "max_output": null,
          "modalities": [
            "video_generation",
            "audio"
          ],
          "open_weights": false,
          "status": "current",
          "best_for": "Best multi-shot character consistency. Cinematic motion physics.",
          "source_url": "https://kling.kuaishou.com",
          "source_date": "2026-03-01"
        },
        {
          "id": "ltx-2.3",
          "name": "Lightricks LTX 2.3",
          "tier": "video",
          "released": "2026-03-05",
          "context": null,
          "max_output": null,
          "modalities": [
            "video_generation",
            "audio"
          ],
          "open_weights": true,
          "status": "current",
          "license": "LTX License",
          "params_total_b": 22,
          "architecture": "Diffusion Transformer",
          "pricing": {
            "input_per_m_usd": null,
            "output_per_m_usd": null,
            "note": "~$0.04/sec hosted"
          },
          "best_for": "Open-source 4K @ 50fps with native audio. FP8 fits 24GB VRAM.",
          "source_url": "https://huggingface.co/Lightricks/LTX-2.3",
          "source_date": "2026-03-05"
        }
      ],
      "how_it_works": {
        "technical": {
          "what_it_is": "Two video specialists — Kuaishou's Kling 3 (best multi-shot character consistency, cinematic motion physics) and Lightricks' LTX 2.3 (22B Diffusion Transformer, native 4K @ 50fps with synchronized audio, FP8 quantized fits 24GB VRAM, ~$0.04/sec hosted, open-source).",
          "training": "Kling 3 — trained on Kuaishou's short-video corpus, optimized for character continuity across shots and physics-grounded motion. LTX 2.3 — 22B DiT trained for joint video+audio generation in a single pass; new VAE produces noticeably sharper textures. LTX weights are open and quantize-friendly.",
          "strengths": [
            "Kling: best multi-shot character consistency in the field",
            "Kling: cinematic motion physics (fabric, water, gestures feel grounded)",
            "LTX: only open-source 4K @ 50fps with native audio",
            "LTX: ~$0.04/sec hosted — cheapest in the field by a margin",
            "LTX: FP8 quantized fits 24GB consumer GPU (4090/5090)",
            "Both: native audio in a single pass"
          ],
          "weaknesses": [
            "Kling: closed weights; Western enterprise procurement caveats",
            "LTX: top-end fidelity below Veo 3 for hero ad creative",
            "Neither has a full-stack ecosystem — pair with another vendor for image / text",
            "Distribution mostly through third-party hosts"
          ],
          "prompt_style": {
            "structure": "For Kling, describe character + shot sequence + per-shot motion. For LTX, describe visual + audio together — they generate jointly.",
            "voice": "Concrete and specific about motion (velocity, weight, direction), camera (locked / push / pan), lighting (soft / dramatic / time-of-day), and mood (precise adjectives).",
            "advanced": [
              "For Kling, use voice reference for character voice; chain extends multi-shot beyond 15s",
              "For LTX, describe audio in the prompt — ambient + foreground",
              "For LTX, self-host the quantized open weights for cost-floor production"
            ]
          },
          "optimal_keywords": [
            "Multi-shot:",
            "Character: [description]",
            "Voice ref: [audio]",
            "Camera: locked / slow push / dolly",
            "Motion: [velocity, weight]",
            "Lighting: [soft / dramatic / golden hour]",
            "4K @ 50fps",
            "Native audio:",
            "Cinematic /grounded physics"
          ],
          "patterns": [
            {
              "name": "Kling multi-shot character clip",
              "example": "Character: [face, build, clothing, distinguishing features]\nVoice ref: [audio]\nShot 1 (0-4s): action + camera angle\nShot 2 (4-8s): different angle, same character\nShot 3 (8-12s): closing emotional beat\nKeep appearance/gait/voice constant across shots"
            },
            {
              "name": "LTX 4K + native audio",
              "example": "Visual: [scene, subject, camera move, lighting]\nAudio: [ambient + foreground — wind, footsteps, dialogue]\nOutput: 4K @ 50fps, native audio\nMake audio match visible motion (footstep on foot-down)"
            },
            {
              "name": "Iterate locked-style (LTX)",
              "example": "Generate 5 variations of the same shot for evaluation\nLocked: aspect ratio, color grade, character, lighting\nVary: ONE thing (camera angle / motion / composition)\nOutput: short 1080p clips for iteration"
            }
          ],
          "anti_patterns": [
            "Iterating at 4K (slow, expensive) — iterate at 1080p, commit at 4K",
            "Not describing audio for LTX (you'll get default audio)",
            "Skipping voice ref for Kling multi-shot dialogue",
            "Treating Kling like generic text-to-video (it shines on character continuity)"
          ],
          "specific_tips": [
            "Kling for narrative / character-driven content; LTX for on-prem and cost-floor",
            "Iterate at 1080p, commit at 4K — saves time and money",
            "Describe audio explicitly for LTX — it generates jointly with video",
            "For long sequences with Kling, chain shot extensions",
            "Self-host LTX FP8 on a 4090/5090 if you have on-prem GPUs",
            "fal.ai / Runware for hosted access to both"
          ]
        },
        "plain": {
          "what_it_is": "This is a grab-bag tab covering the best AI video generators — Sora 2 (OpenAI), Veo 3 (Google), Kling, Runway Gen-4, Seedance, Wan, and Hailuo. Each is good at different things. They generate short clips (5-15 seconds) from a text prompt or a starting image.",
          "training": "These models learned by watching huge libraries of video paired with descriptions. Newer ones also handle physics — they've learned that things fall down, water reflects, hair moves with the wind.",
          "strengths": [
            "Realistic motion and physics in modern models",
            "Image-to-video lets you start from a still and animate it",
            "Some now do sound and dialogue too (Veo 3, Sora 2)",
            "Long-form storytelling tools (Sora's Storyboard, Kling Master)",
            "Quality keeps jumping every few months"
          ],
          "weaknesses": [
            "Still expensive per second compared to other AI work",
            "Hands, multi-person scenes, and long narratives still go wrong",
            "Most clips are 5-15 seconds max",
            "Different models have different strengths — no one model wins everything"
          ],
          "prompt_style": {
            "structure": "Write a shot description like a screenplay direction. Subject, action, setting, camera move, lighting, mood.",
            "voice": "Cinematic and specific. Name the shot type and camera motion.",
            "advanced": [
              "Sora 2 for sound + dialogue + narrative",
              "Veo 3 for cinematic realism with native audio",
              "Kling for fluid motion and 1080p quality",
              "Runway Gen-4 for character consistency across shots",
              "Image-to-video usually beats text-to-video for control"
            ]
          },
          "optimal_keywords": [
            "wide shot",
            "tracking shot",
            "dolly in",
            "handheld",
            "golden hour",
            "cinematic",
            "shallow depth of field",
            "slow motion",
            "natural lighting",
            "shot on 35mm"
          ],
          "patterns": [
            {
              "name": "Cinematic shot",
              "example": "Wide shot, slow dolly forward. A lone surfer paddles out at dawn. Misty, golden light hitting the water. Calm waves. Cinematic, 35mm film look."
            },
            {
              "name": "Image-to-video",
              "example": "[start image]\nAnimate: the woman turns her head slowly toward the camera, hair gently moving in the breeze. Subtle, realistic motion."
            },
            {
              "name": "Action with physics",
              "example": "Medium shot. A glass of red wine tips over on a wooden table. Wine spreads across the surface, picks up reflections from window light. Photorealistic."
            }
          ],
          "anti_patterns": [
            "Trying to fit a whole story in one prompt — stick to one shot",
            "Vague descriptions like 'something cool happens'",
            "Asking for very long clips (most cap at 10-15 seconds)",
            "Mixing too many actions in one shot"
          ],
          "specific_tips": [
            "Storyboard your video shot by shot, generate each, then edit together",
            "Image-to-video gives you way more control than pure text-to-video",
            "Each model has a sweet spot — try the same prompt in two and see",
            "Sound design separately, even if the model can do native audio"
          ]
        }
      }
    },
    "specimage": {
      "display_name": "Specialized Image",
      "product_line": "Z-Image Turbo + Pruna P-Image",
      "verified_at": "2026-06-16",
      "release_notes_url": "https://docs.api.pruna.ai",
      "docs_url": "https://docs.api.pruna.ai",
      "models": [
        {
          "id": "z-image-turbo",
          "name": "Z-Image Turbo",
          "tier": "image",
          "released": "2026",
          "context": null,
          "max_output": null,
          "modalities": [
            "image_generation"
          ],
          "open_weights": true,
          "status": "current",
          "params_total_b": 6,
          "architecture": "S3-DiT",
          "best_for": "Sub-second on 16GB VRAM. Strong English + Chinese typography. 8 inference steps.",
          "source_url": "https://docs.api.pruna.ai",
          "source_date": "2026-02-01"
        }
      ],
      "how_it_works": {
        "technical": {
          "what_it_is": "Z-Image Turbo — 6B-parameter Scalable Single-Stream Diffusion Transformer (S3-DiT), 8 inference steps, sub-second wall clock on a 16GB consumer GPU. Originated at Alibaba's Tongyi-MAI; Pruna AI's optimization engine accelerates it for production. Distinct strength: text rendering in both English and Chinese.",
          "training": "Pretrained as S3-DiT architecture for fast diffusion (8 steps vs 20-50 typical). Pruna's compression pipeline applies pruning, quantization, distillation, and latent caching to preserve quality while shrinking inference cost. LoRA-friendly for style fine-tuning.",
          "strengths": [
            "6B params, 8 inference steps — sub-second wall clock",
            "Runs on 16GB VRAM consumer GPUs",
            "Strong text rendering in BOTH English and Chinese",
            "LoRA-friendly — fine-tune for style with a small dataset",
            "Open weights — fine-tune and self-host",
            "Pruna's optimization platform is the underlying IP — applicable beyond image gen"
          ],
          "weaknesses": [
            "Top-end fidelity below FLUX 2 / Imagen 4 / gpt-image-2",
            "Naming overlap (Z-Image vs Tongyi-MAI's Z-Image) creates confusion",
            "Pruna's productized image-gen tier evolves fast — verify current pricing",
            "Not a maintained 'model family' in the same sense as Qwen Image"
          ],
          "prompt_style": {
            "structure": "Concise, concrete prompts work well — the model is fast so iterate. For typography, specify the exact text. For bilingual content, name both scripts explicitly.",
            "voice": "Direct and descriptive. Style references via LoRA are stronger than verbose style descriptions in the prompt.",
            "advanced": [
              "Use LoRA fine-tuning for consistent style at scale",
              "Iterate interactively — sub-second generation makes real-time UX possible",
              "For bilingual content, name both English and Chinese scripts explicitly"
            ]
          },
          "optimal_keywords": [
            "Bilingual: EN + ZH",
            "Exact text:",
            "LoRA: [style]",
            "Photorealistic / illustrated / minimal",
            "8 steps",
            "Fast iteration",
            "Sub-second"
          ],
          "patterns": [
            {
              "name": "Bilingual typography poster",
              "example": "Generate a poster with:\nEnglish: \"[exact text]\"\nChinese: \"[exact text]\"\nLayout: English headline top, Chinese subhead below\nStyle: [reference / mood]\nRender BOTH scripts with high fidelity"
            },
            {
              "name": "High-volume product variations",
              "example": "Generate 8 variations of the same product shot for catalog\nLocked: product, composition, color tone\nVary: background, lighting angle, prop arrangement\nOutput: 1024×1024 each, optimize for speed"
            },
            {
              "name": "Real-time interactive iteration",
              "example": "I'll describe small adjustments each turn; you regenerate fast\nBaseline: [describe]\nWait for my next instruction; regenerate based on the change"
            }
          ],
          "anti_patterns": [
            "Using verbose style descriptions when a LoRA would work better",
            "Treating it as a top-end hero-asset model (FLUX / Imagen win there)",
            "Skipping the speed advantage — iterate freely, the cost is low"
          ],
          "specific_tips": [
            "Sub-second on 16GB VRAM — use for interactive UX where latency matters",
            "LoRA fine-tune for consistent style at scale (small dataset enough)",
            "Bilingual EN+ZH typography is a distinct strength — use it",
            "8 inference steps means cheap iteration — try 5-10 variants per concept",
            "For top-end hero shots, route to FLUX 2 / Imagen 4 / gpt-image-2 instead",
            "Pruna's optimization platform is the deeper bet — applicable beyond just image gen"
          ]
        },
        "plain": {
          "what_it_is": "This tab covers Z-Image Turbo (Alibaba) and Pruna P-Image — two newer specialized image models that focus on being fast and high-quality without needing massive GPUs. Great if you want quick, sharp images without the cost of Flux Pro.",
          "training": "These models were trained on huge image-caption datasets and then optimized with clever tricks to make them run faster — fewer steps to make an image, smaller memory footprint.",
          "strengths": [
            "Fast — images in seconds, not minutes",
            "Sharp and clean output for the speed",
            "Cheaper to run than Flux Pro",
            "Open weights for some versions, so you can self-host",
            "Good for batch generation when you need many images quickly"
          ],
          "weaknesses": [
            "Less artistic flair than Midjourney or Flux Pro",
            "Hands and faces still sometimes go wrong",
            "Limited text-in-image rendering",
            "Smaller communities than mainstream image models"
          ],
          "prompt_style": {
            "structure": "Vivid descriptive sentences. Lead with the subject, follow with mood, lighting, style.",
            "voice": "Cinematic and specific. Treat the prompt like a quick brief to a photographer.",
            "advanced": [
              "Use few-step inference for speed — these models are tuned for it",
              "Image-to-image variations work well for iterating fast",
              "Combine with LoRA fine-tunes for specific styles",
              "For commercial use, check the license — some are research-only"
            ]
          },
          "optimal_keywords": [
            "photorealistic",
            "cinematic lighting",
            "soft natural light",
            "sharp focus",
            "studio shot",
            "minimalist",
            "isolated on white",
            "product photography"
          ],
          "patterns": [
            {
              "name": "Product shot",
              "example": "Product photo of a sleek black ceramic mug, isolated on white background, soft studio lighting, sharp focus, slight shadow underneath, commercial photography"
            },
            {
              "name": "Quick concept iteration",
              "example": "Cozy reading nook in a forest cabin: armchair, blanket, warm lamp, books on shelf, large window looking onto pine trees. Cinematic, warm tones, golden hour."
            },
            {
              "name": "Batch variations",
              "example": "Generate 4 variations of: minimalist logo for a coffee brand called 'Ember' — simple, hand-drawn flame, earth-tone palette."
            }
          ],
          "anti_patterns": [
            "Expecting Midjourney-level artistic depth",
            "Long sentences of text rendered inside the image",
            "Vague prompts — these models reward specificity",
            "Using max steps when these models are tuned for fast inference"
          ],
          "specific_tips": [
            "These are your tools when you need 100 images, not 1 masterpiece",
            "Pair with an upscaler if you need print resolution",
            "If self-hosting, the speed-per-watt is excellent",
            "For polished single hero images, still reach for Flux or Midjourney"
          ]
        }
      }
    }
  }
}
