{
  "schema_version": "runlocalai-model-intelligence-snapshot-v1",
  "schema_url": "/schemas/runlocalai-model-intelligence-snapshot-v1.json",
  "output_hash_sha256": "7ccf6fea768497c6c2a290c96c44f7620a810c8df5b74b5ec06b956c751e1936",
  "generated_at": "2026-05-28T11:57:26.997Z",
  "generated_by": "scripts/pull-model-intelligence.ts",
  "scope": "model-intelligence-priors",
  "license": "CC-BY-4.0 for RunLocalAI normalization; preserve upstream source licenses and attribution.",
  "attribution": "RunLocalAI normalization over sourced OpenEvals, LMArena, and LiveBench data.",
  "not_local_measurement": true,
  "framework_note": "These scores are external model-intelligence priors for the Will-It-Run framework. They do not claim local fit, local speed, quantized quality, or hardware compatibility.",
  "sources": [
    {
      "source_id": "openevals",
      "label": "OpenEvals leaderboard data",
      "source_url": "https://huggingface.co/datasets/OpenEvals/leaderboard-data",
      "dataset": "OpenEvals/leaderboard-data",
      "config": "default",
      "split": "train",
      "license_note": "Upstream dataset card currently reports MIT.",
      "fetched_rows": 105,
      "total_rows_reported": 105,
      "captured_at": "2026-05-28T11:57:28.766Z"
    },
    {
      "source_id": "lmarena",
      "label": "LMArena text leaderboard",
      "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
      "dataset": "lmarena-ai/leaderboard-dataset",
      "config": "text",
      "split": "latest",
      "license_note": "Preserve upstream LMArena attribution and methodology notes.",
      "fetched_rows": 1000,
      "total_rows_reported": 8890,
      "sampling_note": "Sequential 1000-row slice over 8890 upstream rows.",
      "captured_at": "2026-05-28T11:57:41.408Z"
    },
    {
      "source_id": "livebench",
      "label": "LiveBench model judgments",
      "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
      "dataset": "livebench/model_judgment",
      "config": "default",
      "split": "leaderboard",
      "license_note": "Preserve upstream LiveBench attribution; scores are aggregated from public judgment rows.",
      "fetched_rows": 1000,
      "total_rows_reported": 60372,
      "sampling_note": "Stratified 1000-row sample over 60372 upstream rows. Set MODEL_INTELLIGENCE_LIVEBENCH_MAX_ROWS to change this cap.",
      "captured_at": "2026-05-28T11:57:53.182Z"
    }
  ],
  "normalization": {
    "model_matching": "Model keys are slugged from upstream names. Aliases preserve original source labels for later catalog reconciliation.",
    "composite": "RunLocalAI intelligence prior normalizes available OpenEvals aggregate, LMArena text rating, and LiveBench aggregate judgment to 0-100, then reweights only the components present.",
    "local_execution_claim": "None. Local execution claims must come from RunLocalAI fit math, owner benchmarks, or reviewed community measurements."
  },
  "models": [
    {
      "model_key": "gpt-4-turbo-2024-04-09",
      "display_name": "gpt-4-turbo-2024-04-09",
      "provider": "openai",
      "aliases": [
        "gpt-4-turbo-2024-04-09"
      ],
      "openness": null,
      "license": "Proprietary",
      "parameter_billions": null,
      "context_window": null,
      "modalities": [
        "text"
      ],
      "source_ids": [
        "livebench",
        "lmarena"
      ],
      "composite": {
        "score_0_100": 76.14,
        "confidence": "moderate",
        "components": [
          {
            "source_id": "lmarena",
            "metric": "lmarena_text_overall_rating",
            "normalized_0_100": 65.86,
            "weight": 0.35
          },
          {
            "source_id": "livebench",
            "metric": "livebench_overall_mean_judgment",
            "normalized_0_100": 94.12,
            "weight": 0.2
          }
        ],
        "derived_from": [
          "lmarena:lmarena_text_overall_rating",
          "livebench:livebench_overall_mean_judgment"
        ],
        "limitations": [
          "This is an external model-intelligence prior, not a RunLocalAI local speed or fit measurement.",
          "Provider papers, human-preference ratings, and LiveBench judgments are not interchangeable with quantized local execution.",
          "Use this score only beside Will-It-Run fit, quantization, context, runtime, and measured tok/s evidence."
        ]
      },
      "scores": [
        {
          "source_id": "livebench",
          "metric": "livebench_category_coding_mean_judgment",
          "value": 1,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "coding",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench category aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_category_instruction_following_mean_judgment",
          "value": 1,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "instruction_following",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench category aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_category_language_mean_judgment",
          "value": 0.901961,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "language",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench category aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_overall_mean_judgment",
          "value": 0.941176,
          "normalized_0_100": 94.12,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench judgment aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_task_connections_mean_judgment",
          "value": 1,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "language",
          "task": "connections",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench task aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_task_lcb_generation_mean_judgment",
          "value": 1,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "coding",
          "task": "LCB_generation",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench task aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_task_plot_unscrambling_mean_judgment",
          "value": 0.705882,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "language",
          "task": "plot_unscrambling",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench task aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_task_story_generation_mean_judgment",
          "value": 1,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "instruction_following",
          "task": "story_generation",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench task aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_task_typos_mean_judgment",
          "value": 1,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "language",
          "task": "typos",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench task aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_chinese_rating",
          "value": 1241.2685902841272,
          "normalized_0_100": 62.23,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 216,
          "lower": 1232.0542956768359,
          "upper": 1250.4828848914185,
          "category": "chinese",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_coding_rating",
          "value": 1268.8604440450376,
          "normalized_0_100": 65.51,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 228,
          "lower": 1261.761468509028,
          "upper": 1275.9594195810473,
          "category": "coding",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_overall_rating",
          "value": 1271.733102895077,
          "normalized_0_100": 65.86,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 223,
          "lower": 1267.8513527350951,
          "upper": 1275.614853055059,
          "category": "overall",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        }
      ]
    },
    {
      "model_key": "gpt-4-0125-preview",
      "display_name": "gpt-4-0125-preview",
      "provider": "openai",
      "aliases": [
        "gpt-4-0125-preview"
      ],
      "openness": null,
      "license": "Proprietary",
      "parameter_billions": null,
      "context_window": null,
      "modalities": [
        "text"
      ],
      "source_ids": [
        "livebench",
        "lmarena"
      ],
      "composite": {
        "score_0_100": 74.99,
        "confidence": "moderate",
        "components": [
          {
            "source_id": "lmarena",
            "metric": "lmarena_text_overall_rating",
            "normalized_0_100": 64.73,
            "weight": 0.35
          },
          {
            "source_id": "livebench",
            "metric": "livebench_overall_mean_judgment",
            "normalized_0_100": 92.94,
            "weight": 0.2
          }
        ],
        "derived_from": [
          "lmarena:lmarena_text_overall_rating",
          "livebench:livebench_overall_mean_judgment"
        ],
        "limitations": [
          "This is an external model-intelligence prior, not a RunLocalAI local speed or fit measurement.",
          "Provider papers, human-preference ratings, and LiveBench judgments are not interchangeable with quantized local execution.",
          "Use this score only beside Will-It-Run fit, quantization, context, runtime, and measured tok/s evidence."
        ]
      },
      "scores": [
        {
          "source_id": "livebench",
          "metric": "livebench_category_coding_mean_judgment",
          "value": 1,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "coding",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench category aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_category_instruction_following_mean_judgment",
          "value": 1,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "instruction_following",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench category aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_category_language_mean_judgment",
          "value": 0.882353,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "language",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench category aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_overall_mean_judgment",
          "value": 0.929412,
          "normalized_0_100": 92.94,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench judgment aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_task_connections_mean_judgment",
          "value": 1,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "language",
          "task": "connections",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench task aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_task_lcb_generation_mean_judgment",
          "value": 1,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "coding",
          "task": "LCB_generation",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench task aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_task_plot_unscrambling_mean_judgment",
          "value": 0.647059,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "language",
          "task": "plot_unscrambling",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench task aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_task_story_generation_mean_judgment",
          "value": 1,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "instruction_following",
          "task": "story_generation",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench task aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_task_typos_mean_judgment",
          "value": 1,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "language",
          "task": "typos",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench task aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_chinese_rating",
          "value": 1236.567122966349,
          "normalized_0_100": 61.67,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 223,
          "lower": 1226.9824099642183,
          "upper": 1246.1518359684792,
          "category": "chinese",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_coding_rating",
          "value": 1249.6790078177642,
          "normalized_0_100": 63.23,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 237,
          "lower": 1242.1100690952053,
          "upper": 1257.2479465403228,
          "category": "coding",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_overall_rating",
          "value": 1262.2463171146692,
          "normalized_0_100": 64.73,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 232,
          "lower": 1258.1758797355537,
          "upper": 1266.316754493785,
          "category": "overall",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        }
      ]
    },
    {
      "model_key": "claude-3-5-sonnet-20241022",
      "display_name": "claude-3-5-sonnet-20241022",
      "provider": "anthropic",
      "aliases": [
        "claude-3-5-sonnet-20241022"
      ],
      "openness": null,
      "license": "Proprietary",
      "parameter_billions": null,
      "context_window": null,
      "modalities": [
        "text"
      ],
      "source_ids": [
        "livebench",
        "lmarena"
      ],
      "composite": {
        "score_0_100": 74.61,
        "confidence": "moderate",
        "components": [
          {
            "source_id": "lmarena",
            "metric": "lmarena_text_overall_rating",
            "normalized_0_100": 68.93,
            "weight": 0.35
          },
          {
            "source_id": "livebench",
            "metric": "livebench_overall_mean_judgment",
            "normalized_0_100": 84.55,
            "weight": 0.2
          }
        ],
        "derived_from": [
          "lmarena:lmarena_text_overall_rating",
          "livebench:livebench_overall_mean_judgment"
        ],
        "limitations": [
          "This is an external model-intelligence prior, not a RunLocalAI local speed or fit measurement.",
          "Provider papers, human-preference ratings, and LiveBench judgments are not interchangeable with quantized local execution.",
          "Use this score only beside Will-It-Run fit, quantization, context, runtime, and measured tok/s evidence."
        ]
      },
      "scores": [
        {
          "source_id": "livebench",
          "metric": "livebench_category_coding_mean_judgment",
          "value": 1,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "coding",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench category aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_category_instruction_following_mean_judgment",
          "value": 0.65,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "instruction_following",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench category aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_category_language_mean_judgment",
          "value": 0.861818,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "language",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench category aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_overall_mean_judgment",
          "value": 0.845455,
          "normalized_0_100": 84.55,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench judgment aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_task_coding_completion_mean_judgment",
          "value": 1,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "coding",
          "task": "coding_completion",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench task aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_task_connections_mean_judgment",
          "value": 1,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "language",
          "task": "connections",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench task aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_task_lcb_generation_mean_judgment",
          "value": 1,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "coding",
          "task": "LCB_generation",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench task aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_task_paraphrase_mean_judgment",
          "value": 0.3,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "instruction_following",
          "task": "paraphrase",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench task aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_task_plot_unscrambling_mean_judgment",
          "value": 0.309091,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "language",
          "task": "plot_unscrambling",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench task aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_task_story_generation_mean_judgment",
          "value": 1,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "instruction_following",
          "task": "story_generation",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench task aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_task_typos_mean_judgment",
          "value": 1,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "language",
          "task": "typos",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench task aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_chinese_rating",
          "value": 1270.5540758154339,
          "normalized_0_100": 65.72,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 200,
          "lower": 1262.3785911244918,
          "upper": 1278.7295605063762,
          "category": "chinese",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_coding_rating",
          "value": 1342.8229448516686,
          "normalized_0_100": 74.31,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 174,
          "lower": 1337.4360285281666,
          "upper": 1348.2098611751708,
          "category": "coding",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_overall_rating",
          "value": 1297.613978494011,
          "normalized_0_100": 68.93,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 196,
          "lower": 1294.4863139934228,
          "upper": 1300.7416429945993,
          "category": "overall",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        }
      ]
    },
    {
      "model_key": "claude-3-sonnet-20240229",
      "display_name": "claude-3-sonnet-20240229",
      "provider": "anthropic",
      "aliases": [
        "claude-3-sonnet-20240229"
      ],
      "openness": null,
      "license": "Proprietary",
      "parameter_billions": null,
      "context_window": null,
      "modalities": [
        "text"
      ],
      "source_ids": [
        "livebench",
        "lmarena"
      ],
      "composite": {
        "score_0_100": 74.21,
        "confidence": "moderate",
        "components": [
          {
            "source_id": "lmarena",
            "metric": "lmarena_text_overall_rating",
            "normalized_0_100": 59.48,
            "weight": 0.35
          },
          {
            "source_id": "livebench",
            "metric": "livebench_overall_mean_judgment",
            "normalized_0_100": 100,
            "weight": 0.2
          }
        ],
        "derived_from": [
          "lmarena:lmarena_text_overall_rating",
          "livebench:livebench_overall_mean_judgment"
        ],
        "limitations": [
          "This is an external model-intelligence prior, not a RunLocalAI local speed or fit measurement.",
          "Provider papers, human-preference ratings, and LiveBench judgments are not interchangeable with quantized local execution.",
          "Use this score only beside Will-It-Run fit, quantization, context, runtime, and measured tok/s evidence."
        ]
      },
      "scores": [
        {
          "source_id": "livebench",
          "metric": "livebench_category_coding_mean_judgment",
          "value": 1,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "coding",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench category aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_category_instruction_following_mean_judgment",
          "value": 1,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "instruction_following",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench category aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_category_language_mean_judgment",
          "value": 1,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "language",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench category aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_overall_mean_judgment",
          "value": 1,
          "normalized_0_100": 100,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench judgment aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_task_coding_completion_mean_judgment",
          "value": 1,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "coding",
          "task": "coding_completion",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench task aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_task_connections_mean_judgment",
          "value": 1,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "language",
          "task": "connections",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench task aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_task_lcb_generation_mean_judgment",
          "value": 1,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "coding",
          "task": "LCB_generation",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench task aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_task_story_generation_mean_judgment",
          "value": 1,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "instruction_following",
          "task": "story_generation",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench task aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_task_typos_mean_judgment",
          "value": 1,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "language",
          "task": "typos",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench task aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_chinese_rating",
          "value": 1187.6648137247748,
          "normalized_0_100": 55.86,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 252,
          "lower": 1178.5921172148935,
          "upper": 1196.737510234656,
          "category": "chinese",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_coding_rating",
          "value": 1223.2416964116917,
          "normalized_0_100": 60.09,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 250,
          "lower": 1215.931508789734,
          "upper": 1230.5518840336495,
          "category": "coding",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_overall_rating",
          "value": 1218.0828967285254,
          "normalized_0_100": 59.48,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 255,
          "lower": 1214.1247386719642,
          "upper": 1222.0410547850865,
          "category": "overall",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        }
      ]
    },
    {
      "model_key": "gpt-4-5-preview-2025-02-27",
      "display_name": "gpt-4.5-preview-2025-02-27",
      "provider": "openai",
      "aliases": [
        "gpt-4.5-preview-2025-02-27"
      ],
      "openness": null,
      "license": "Proprietary",
      "parameter_billions": null,
      "context_window": null,
      "modalities": [
        "text"
      ],
      "source_ids": [
        "livebench",
        "lmarena"
      ],
      "composite": {
        "score_0_100": 73.73,
        "confidence": "moderate",
        "components": [
          {
            "source_id": "lmarena",
            "metric": "lmarena_text_overall_rating",
            "normalized_0_100": 83.17,
            "weight": 0.35
          },
          {
            "source_id": "livebench",
            "metric": "livebench_overall_mean_judgment",
            "normalized_0_100": 57.22,
            "weight": 0.2
          }
        ],
        "derived_from": [
          "lmarena:lmarena_text_overall_rating",
          "livebench:livebench_overall_mean_judgment"
        ],
        "limitations": [
          "This is an external model-intelligence prior, not a RunLocalAI local speed or fit measurement.",
          "Provider papers, human-preference ratings, and LiveBench judgments are not interchangeable with quantized local execution.",
          "Use this score only beside Will-It-Run fit, quantization, context, runtime, and measured tok/s evidence."
        ]
      },
      "scores": [
        {
          "source_id": "livebench",
          "metric": "livebench_category_coding_mean_judgment",
          "value": 1,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "coding",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench category aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_category_language_mean_judgment",
          "value": 0.465241,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "language",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench category aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_overall_mean_judgment",
          "value": 0.572193,
          "normalized_0_100": 57.22,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench judgment aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_task_lcb_generation_mean_judgment",
          "value": 1,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "coding",
          "task": "LCB_generation",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench task aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_task_plot_unscrambling_mean_judgment",
          "value": 0.430481,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "language",
          "task": "plot_unscrambling",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench task aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_task_typos_mean_judgment",
          "value": 0.5,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "language",
          "task": "typos",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench task aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_chinese_rating",
          "value": 1421.1055319430527,
          "normalized_0_100": 83.62,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 111,
          "lower": 1401.5868087791873,
          "upper": 1440.624255106918,
          "category": "chinese",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_coding_rating",
          "value": 1397.0817983788056,
          "normalized_0_100": 80.76,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 124,
          "lower": 1384.144739710054,
          "upper": 1410.018857047557,
          "category": "coding",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_overall_rating",
          "value": 1417.3446099906644,
          "normalized_0_100": 83.17,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 83,
          "lower": 1411.6783794129715,
          "upper": 1423.0108405683573,
          "category": "overall",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        }
      ]
    },
    {
      "model_key": "qwen2-72b-instruct",
      "display_name": "qwen2-72b-instruct",
      "provider": "alibaba",
      "aliases": [
        "Qwen2-72B-Instruct",
        "qwen2-72b-instruct"
      ],
      "openness": null,
      "license": "Qianwen LICENSE",
      "parameter_billions": null,
      "context_window": null,
      "modalities": [
        "text"
      ],
      "source_ids": [
        "livebench",
        "lmarena"
      ],
      "composite": {
        "score_0_100": 73.09,
        "confidence": "moderate",
        "components": [
          {
            "source_id": "lmarena",
            "metric": "lmarena_text_overall_rating",
            "normalized_0_100": 57.72,
            "weight": 0.35
          },
          {
            "source_id": "livebench",
            "metric": "livebench_overall_mean_judgment",
            "normalized_0_100": 100,
            "weight": 0.2
          }
        ],
        "derived_from": [
          "lmarena:lmarena_text_overall_rating",
          "livebench:livebench_overall_mean_judgment"
        ],
        "limitations": [
          "This is an external model-intelligence prior, not a RunLocalAI local speed or fit measurement.",
          "Provider papers, human-preference ratings, and LiveBench judgments are not interchangeable with quantized local execution.",
          "Use this score only beside Will-It-Run fit, quantization, context, runtime, and measured tok/s evidence."
        ]
      },
      "scores": [
        {
          "source_id": "livebench",
          "metric": "livebench_category_coding_mean_judgment",
          "value": 1,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "coding",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench category aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_category_instruction_following_mean_judgment",
          "value": 1,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "instruction_following",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench category aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_category_language_mean_judgment",
          "value": 1,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "language",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench category aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_overall_mean_judgment",
          "value": 1,
          "normalized_0_100": 100,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench judgment aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_task_coding_completion_mean_judgment",
          "value": 1,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "coding",
          "task": "coding_completion",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench task aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_task_connections_mean_judgment",
          "value": 1,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "language",
          "task": "connections",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench task aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_task_lcb_generation_mean_judgment",
          "value": 1,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "coding",
          "task": "LCB_generation",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench task aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_task_story_generation_mean_judgment",
          "value": 1,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "instruction_following",
          "task": "story_generation",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench task aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_chinese_rating",
          "value": 1238.9565933701958,
          "normalized_0_100": 61.96,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 220,
          "lower": 1227.5542832226495,
          "upper": 1250.358903517742,
          "category": "chinese",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_coding_rating",
          "value": 1196.3440224935357,
          "normalized_0_100": 56.89,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 261,
          "lower": 1187.4505852502598,
          "upper": 1205.2374597368116,
          "category": "coding",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_overall_rating",
          "value": 1203.3170742116354,
          "normalized_0_100": 57.72,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 263,
          "lower": 1198.4192389242903,
          "upper": 1208.2149094989807,
          "category": "overall",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        }
      ]
    },
    {
      "model_key": "gemini-2-0-flash-001",
      "display_name": "gemini-2.0-flash-001",
      "provider": "google",
      "aliases": [
        "gemini-2.0-flash-001"
      ],
      "openness": null,
      "license": "Proprietary",
      "parameter_billions": null,
      "context_window": null,
      "modalities": [
        "text"
      ],
      "source_ids": [
        "livebench",
        "lmarena"
      ],
      "composite": {
        "score_0_100": 72.82,
        "confidence": "moderate",
        "components": [
          {
            "source_id": "lmarena",
            "metric": "lmarena_text_overall_rating",
            "normalized_0_100": 75.64,
            "weight": 0.35
          },
          {
            "source_id": "livebench",
            "metric": "livebench_overall_mean_judgment",
            "normalized_0_100": 67.88,
            "weight": 0.2
          }
        ],
        "derived_from": [
          "lmarena:lmarena_text_overall_rating",
          "livebench:livebench_overall_mean_judgment"
        ],
        "limitations": [
          "This is an external model-intelligence prior, not a RunLocalAI local speed or fit measurement.",
          "Provider papers, human-preference ratings, and LiveBench judgments are not interchangeable with quantized local execution.",
          "Use this score only beside Will-It-Run fit, quantization, context, runtime, and measured tok/s evidence."
        ]
      },
      "scores": [
        {
          "source_id": "livebench",
          "metric": "livebench_category_coding_mean_judgment",
          "value": 1,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "coding",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench category aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_category_instruction_following_mean_judgment",
          "value": 1,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "instruction_following",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench category aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_category_language_mean_judgment",
          "value": 0.550374,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "language",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench category aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_overall_mean_judgment",
          "value": 0.678839,
          "normalized_0_100": 67.88,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench judgment aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_task_lcb_generation_mean_judgment",
          "value": 1,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "coding",
          "task": "LCB_generation",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench task aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_task_paraphrase_mean_judgment",
          "value": 1,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "instruction_following",
          "task": "paraphrase",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench task aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_task_plot_unscrambling_mean_judgment",
          "value": 0.375936,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "language",
          "task": "plot_unscrambling",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench task aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_task_typos_mean_judgment",
          "value": 0.666667,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "language",
          "task": "typos",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench task aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_chinese_rating",
          "value": 1378.5127064490093,
          "normalized_0_100": 78.55,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 142,
          "lower": 1366.4194802097218,
          "upper": 1390.605932688297,
          "category": "chinese",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_coding_rating",
          "value": 1351.6277775003718,
          "normalized_0_100": 75.36,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 169,
          "lower": 1344.4818076172626,
          "upper": 1358.7737473834813,
          "category": "coding",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_overall_rating",
          "value": 1354.0071937268474,
          "normalized_0_100": 75.64,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 150,
          "lower": 1350.303163539439,
          "upper": 1357.7112239142557,
          "category": "overall",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        }
      ]
    },
    {
      "model_key": "gpt-4-1106-preview",
      "display_name": "gpt-4-1106-preview",
      "provider": "openai",
      "aliases": [
        "gpt-4-1106-preview"
      ],
      "openness": null,
      "license": "Proprietary",
      "parameter_billions": null,
      "context_window": null,
      "modalities": [
        "text"
      ],
      "source_ids": [
        "livebench",
        "lmarena"
      ],
      "composite": {
        "score_0_100": 71.87,
        "confidence": "moderate",
        "components": [
          {
            "source_id": "lmarena",
            "metric": "lmarena_text_overall_rating",
            "normalized_0_100": 64.87,
            "weight": 0.35
          },
          {
            "source_id": "livebench",
            "metric": "livebench_overall_mean_judgment",
            "normalized_0_100": 84.12,
            "weight": 0.2
          }
        ],
        "derived_from": [
          "lmarena:lmarena_text_overall_rating",
          "livebench:livebench_overall_mean_judgment"
        ],
        "limitations": [
          "This is an external model-intelligence prior, not a RunLocalAI local speed or fit measurement.",
          "Provider papers, human-preference ratings, and LiveBench judgments are not interchangeable with quantized local execution.",
          "Use this score only beside Will-It-Run fit, quantization, context, runtime, and measured tok/s evidence."
        ]
      },
      "scores": [
        {
          "source_id": "livebench",
          "metric": "livebench_category_coding_mean_judgment",
          "value": 1,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "coding",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench category aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_category_instruction_following_mean_judgment",
          "value": 1,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "instruction_following",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench category aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_category_language_mean_judgment",
          "value": 0.735294,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "language",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench category aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_overall_mean_judgment",
          "value": 0.841176,
          "normalized_0_100": 84.12,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench judgment aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_task_connections_mean_judgment",
          "value": 0.5,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "language",
          "task": "connections",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench task aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_task_lcb_generation_mean_judgment",
          "value": 1,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "coding",
          "task": "LCB_generation",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench task aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_task_plot_unscrambling_mean_judgment",
          "value": 0.705882,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "language",
          "task": "plot_unscrambling",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench task aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_task_story_generation_mean_judgment",
          "value": 1,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "instruction_following",
          "task": "story_generation",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench task aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_task_typos_mean_judgment",
          "value": 1,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "language",
          "task": "typos",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench task aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_chinese_rating",
          "value": 1241.464501866134,
          "normalized_0_100": 62.26,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 214,
          "lower": 1231.6731710292424,
          "upper": 1251.2558327030251,
          "category": "chinese",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_coding_rating",
          "value": 1254.5226910385459,
          "normalized_0_100": 63.81,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 235,
          "lower": 1247.1613706136363,
          "upper": 1261.8840114634554,
          "category": "coding",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_overall_rating",
          "value": 1263.4800949324156,
          "normalized_0_100": 64.87,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 230,
          "lower": 1259.624876427882,
          "upper": 1267.335313436949,
          "category": "overall",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        }
      ]
    },
    {
      "model_key": "gpt-4o-2024-05-13",
      "display_name": "gpt-4o-2024-05-13",
      "provider": "openai",
      "aliases": [
        "gpt-4o-2024-05-13"
      ],
      "openness": null,
      "license": "Proprietary",
      "parameter_billions": null,
      "context_window": null,
      "modalities": [
        "text"
      ],
      "source_ids": [
        "livebench",
        "lmarena"
      ],
      "composite": {
        "score_0_100": 71.83,
        "confidence": "moderate",
        "components": [
          {
            "source_id": "lmarena",
            "metric": "lmarena_text_overall_rating",
            "normalized_0_100": 69.27,
            "weight": 0.35
          },
          {
            "source_id": "livebench",
            "metric": "livebench_overall_mean_judgment",
            "normalized_0_100": 76.31,
            "weight": 0.2
          }
        ],
        "derived_from": [
          "lmarena:lmarena_text_overall_rating",
          "livebench:livebench_overall_mean_judgment"
        ],
        "limitations": [
          "This is an external model-intelligence prior, not a RunLocalAI local speed or fit measurement.",
          "Provider papers, human-preference ratings, and LiveBench judgments are not interchangeable with quantized local execution.",
          "Use this score only beside Will-It-Run fit, quantization, context, runtime, and measured tok/s evidence."
        ]
      },
      "scores": [
        {
          "source_id": "livebench",
          "metric": "livebench_category_coding_mean_judgment",
          "value": 1,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "coding",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench category aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_category_instruction_following_mean_judgment",
          "value": 0.583333,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "instruction_following",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench category aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_category_language_mean_judgment",
          "value": 0.803922,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "language",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench category aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_overall_mean_judgment",
          "value": 0.763072,
          "normalized_0_100": 76.31,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench judgment aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_task_connections_mean_judgment",
          "value": 1,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "language",
          "task": "connections",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench task aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_task_lcb_generation_mean_judgment",
          "value": 1,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "coding",
          "task": "LCB_generation",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench task aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_task_paraphrase_mean_judgment",
          "value": 0.166667,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "instruction_following",
          "task": "paraphrase",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench task aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_task_plot_unscrambling_mean_judgment",
          "value": 0.411765,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "language",
          "task": "plot_unscrambling",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench task aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_task_story_generation_mean_judgment",
          "value": 1,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "instruction_following",
          "task": "story_generation",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench task aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_task_typos_mean_judgment",
          "value": 1,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "language",
          "task": "typos",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench task aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_chinese_rating",
          "value": 1276.2875005052788,
          "normalized_0_100": 66.4,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 193,
          "lower": 1267.856291055105,
          "upper": 1284.7187099554528,
          "category": "chinese",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_coding_rating",
          "value": 1297.6590282042594,
          "normalized_0_100": 68.94,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 203,
          "lower": 1291.2665966490533,
          "upper": 1304.0514597594656,
          "category": "coding",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_overall_rating",
          "value": 1300.4826332365842,
          "normalized_0_100": 69.27,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 192,
          "lower": 1297.0715754509588,
          "upper": 1303.8936910222099,
          "category": "overall",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        }
      ]
    },
    {
      "model_key": "claude-3-5-sonnet-20240620",
      "display_name": "claude-3-5-sonnet-20240620",
      "provider": "anthropic",
      "aliases": [
        "claude-3-5-sonnet-20240620"
      ],
      "openness": null,
      "license": "Proprietary",
      "parameter_billions": null,
      "context_window": null,
      "modalities": [
        "text"
      ],
      "source_ids": [
        "livebench",
        "lmarena"
      ],
      "composite": {
        "score_0_100": 71.7,
        "confidence": "moderate",
        "components": [
          {
            "source_id": "lmarena",
            "metric": "lmarena_text_overall_rating",
            "normalized_0_100": 66.95,
            "weight": 0.35
          },
          {
            "source_id": "livebench",
            "metric": "livebench_overall_mean_judgment",
            "normalized_0_100": 80,
            "weight": 0.2
          }
        ],
        "derived_from": [
          "lmarena:lmarena_text_overall_rating",
          "livebench:livebench_overall_mean_judgment"
        ],
        "limitations": [
          "This is an external model-intelligence prior, not a RunLocalAI local speed or fit measurement.",
          "Provider papers, human-preference ratings, and LiveBench judgments are not interchangeable with quantized local execution.",
          "Use this score only beside Will-It-Run fit, quantization, context, runtime, and measured tok/s evidence."
        ]
      },
      "scores": [
        {
          "source_id": "livebench",
          "metric": "livebench_category_coding_mean_judgment",
          "value": 0.75,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "coding",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench category aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_category_instruction_following_mean_judgment",
          "value": 0.7,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "instruction_following",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench category aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_category_language_mean_judgment",
          "value": 1,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "language",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench category aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_overall_mean_judgment",
          "value": 0.8,
          "normalized_0_100": 80,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench judgment aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_task_coding_completion_mean_judgment",
          "value": 1,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "coding",
          "task": "coding_completion",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench task aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_task_connections_mean_judgment",
          "value": 1,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "language",
          "task": "connections",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench task aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_task_lcb_generation_mean_judgment",
          "value": 0.666667,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "coding",
          "task": "LCB_generation",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench task aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_task_paraphrase_mean_judgment",
          "value": 0.4,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "instruction_following",
          "task": "paraphrase",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench task aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_task_story_generation_mean_judgment",
          "value": 1,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "instruction_following",
          "task": "story_generation",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench task aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_task_typos_mean_judgment",
          "value": 1,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "language",
          "task": "typos",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench task aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_chinese_rating",
          "value": 1263.7508383341942,
          "normalized_0_100": 64.91,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 201,
          "lower": 1254.6489931237325,
          "upper": 1272.8526835446555,
          "category": "chinese",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_coding_rating",
          "value": 1307.0278684642378,
          "normalized_0_100": 70.05,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 198,
          "lower": 1300.050050471743,
          "upper": 1314.005686456733,
          "category": "coding",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_overall_rating",
          "value": 1280.9606550719923,
          "normalized_0_100": 66.95,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 215,
          "lower": 1277.5622240072719,
          "upper": 1284.3590861367124,
          "category": "overall",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        }
      ]
    },
    {
      "model_key": "deepseek-v3-0324",
      "display_name": "deepseek-v3-0324",
      "provider": "deepseek",
      "aliases": [
        "deepseek-v3-0324"
      ],
      "openness": null,
      "license": "MIT",
      "parameter_billions": null,
      "context_window": null,
      "modalities": [
        "text"
      ],
      "source_ids": [
        "livebench",
        "lmarena"
      ],
      "composite": {
        "score_0_100": 71.3,
        "confidence": "moderate",
        "components": [
          {
            "source_id": "lmarena",
            "metric": "lmarena_text_overall_rating",
            "normalized_0_100": 78.11,
            "weight": 0.35
          },
          {
            "source_id": "livebench",
            "metric": "livebench_overall_mean_judgment",
            "normalized_0_100": 59.39,
            "weight": 0.2
          }
        ],
        "derived_from": [
          "lmarena:lmarena_text_overall_rating",
          "livebench:livebench_overall_mean_judgment"
        ],
        "limitations": [
          "This is an external model-intelligence prior, not a RunLocalAI local speed or fit measurement.",
          "Provider papers, human-preference ratings, and LiveBench judgments are not interchangeable with quantized local execution.",
          "Use this score only beside Will-It-Run fit, quantization, context, runtime, and measured tok/s evidence."
        ]
      },
      "scores": [
        {
          "source_id": "livebench",
          "metric": "livebench_category_coding_mean_judgment",
          "value": 1,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "coding",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench category aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_category_instruction_following_mean_judgment",
          "value": 0.4,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "instruction_following",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench category aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_category_language_mean_judgment",
          "value": 0.540909,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "language",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench category aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_overall_mean_judgment",
          "value": 0.593939,
          "normalized_0_100": 59.39,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench judgment aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_task_lcb_generation_mean_judgment",
          "value": 1,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "coding",
          "task": "LCB_generation",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench task aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_task_paraphrase_mean_judgment",
          "value": 0.4,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "instruction_following",
          "task": "paraphrase",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench task aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_task_plot_unscrambling_mean_judgment",
          "value": 0.163636,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "language",
          "task": "plot_unscrambling",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench task aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_task_typos_mean_judgment",
          "value": 0.666667,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "language",
          "task": "typos",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench task aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_chinese_rating",
          "value": 1388.9944349990703,
          "normalized_0_100": 79.8,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 133,
          "lower": 1377.0150344948613,
          "upper": 1400.9738355032794,
          "category": "chinese",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_coding_rating",
          "value": 1368.806257121475,
          "normalized_0_100": 77.4,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 149,
          "lower": 1362.056593969391,
          "upper": 1375.5559202735592,
          "category": "coding",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_overall_rating",
          "value": 1374.7598197160746,
          "normalized_0_100": 78.11,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 128,
          "lower": 1370.9299575946047,
          "upper": 1378.5896818375445,
          "category": "overall",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        }
      ]
    },
    {
      "model_key": "deepseek-r1",
      "display_name": "deepseek-r1",
      "provider": "deepseek",
      "aliases": [
        "deepseek-r1"
      ],
      "openness": null,
      "license": "MIT",
      "parameter_billions": null,
      "context_window": null,
      "modalities": [
        "text"
      ],
      "source_ids": [
        "livebench",
        "lmarena"
      ],
      "composite": {
        "score_0_100": 71.03,
        "confidence": "moderate",
        "components": [
          {
            "source_id": "lmarena",
            "metric": "lmarena_text_overall_rating",
            "normalized_0_100": 77.86,
            "weight": 0.35
          },
          {
            "source_id": "livebench",
            "metric": "livebench_overall_mean_judgment",
            "normalized_0_100": 59.09,
            "weight": 0.2
          }
        ],
        "derived_from": [
          "lmarena:lmarena_text_overall_rating",
          "livebench:livebench_overall_mean_judgment"
        ],
        "limitations": [
          "This is an external model-intelligence prior, not a RunLocalAI local speed or fit measurement.",
          "Provider papers, human-preference ratings, and LiveBench judgments are not interchangeable with quantized local execution.",
          "Use this score only beside Will-It-Run fit, quantization, context, runtime, and measured tok/s evidence."
        ]
      },
      "scores": [
        {
          "source_id": "livebench",
          "metric": "livebench_category_coding_mean_judgment",
          "value": 1,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "coding",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench category aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_category_instruction_following_mean_judgment",
          "value": 0.4,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "instruction_following",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench category aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_category_language_mean_judgment",
          "value": 0.536364,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "language",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench category aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_overall_mean_judgment",
          "value": 0.590909,
          "normalized_0_100": 59.09,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench judgment aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_task_lcb_generation_mean_judgment",
          "value": 1,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "coding",
          "task": "LCB_generation",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench task aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_task_paraphrase_mean_judgment",
          "value": 0.4,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "instruction_following",
          "task": "paraphrase",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench task aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_task_plot_unscrambling_mean_judgment",
          "value": 0.145455,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "language",
          "task": "plot_unscrambling",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench task aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_task_typos_mean_judgment",
          "value": 0.666667,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "language",
          "task": "typos",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench task aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_chinese_rating",
          "value": 1400.0783556516842,
          "normalized_0_100": 81.12,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 125,
          "lower": 1382.0715822049187,
          "upper": 1418.0851290984497,
          "category": "chinese",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_coding_rating",
          "value": 1371.9179532781072,
          "normalized_0_100": 77.77,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 147,
          "lower": 1360.183014038527,
          "upper": 1383.6528925176872,
          "category": "coding",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_overall_rating",
          "value": 1372.6669569821809,
          "normalized_0_100": 77.86,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 130,
          "lower": 1367.8448584496991,
          "upper": 1377.4890555146626,
          "category": "overall",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        }
      ]
    },
    {
      "model_key": "qwq-32b",
      "display_name": "qwq-32b",
      "provider": "alibaba",
      "aliases": [
        "qwq-32b"
      ],
      "openness": null,
      "license": "Apache 2.0",
      "parameter_billions": null,
      "context_window": null,
      "modalities": [
        "text"
      ],
      "source_ids": [
        "livebench",
        "lmarena"
      ],
      "composite": {
        "score_0_100": 69.79,
        "confidence": "moderate",
        "components": [
          {
            "source_id": "lmarena",
            "metric": "lmarena_text_overall_rating",
            "normalized_0_100": 72.69,
            "weight": 0.35
          },
          {
            "source_id": "livebench",
            "metric": "livebench_overall_mean_judgment",
            "normalized_0_100": 64.72,
            "weight": 0.2
          }
        ],
        "derived_from": [
          "lmarena:lmarena_text_overall_rating",
          "livebench:livebench_overall_mean_judgment"
        ],
        "limitations": [
          "This is an external model-intelligence prior, not a RunLocalAI local speed or fit measurement.",
          "Provider papers, human-preference ratings, and LiveBench judgments are not interchangeable with quantized local execution.",
          "Use this score only beside Will-It-Run fit, quantization, context, runtime, and measured tok/s evidence."
        ]
      },
      "scores": [
        {
          "source_id": "livebench",
          "metric": "livebench_category_coding_mean_judgment",
          "value": 1,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "coding",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench category aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_category_instruction_following_mean_judgment",
          "value": 1,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "instruction_following",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench category aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_category_language_mean_judgment",
          "value": 0.470863,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "language",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench category aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_overall_mean_judgment",
          "value": 0.647242,
          "normalized_0_100": 64.72,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench judgment aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_task_coding_completion_mean_judgment",
          "value": 1,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "coding",
          "task": "coding_completion",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench task aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_task_paraphrase_mean_judgment",
          "value": 1,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "instruction_following",
          "task": "paraphrase",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench task aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_task_plot_unscrambling_mean_judgment",
          "value": 0.294484,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "language",
          "task": "plot_unscrambling",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench task aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_task_typos_mean_judgment",
          "value": 1,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "language",
          "task": "typos",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench task aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_chinese_rating",
          "value": 1378.4354734395074,
          "normalized_0_100": 78.54,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 143,
          "lower": 1362.5116578588493,
          "upper": 1394.3592890201655,
          "category": "chinese",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_coding_rating",
          "value": 1334.4376106749503,
          "normalized_0_100": 73.31,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 178,
          "lower": 1325.47431753965,
          "upper": 1343.4009038102506,
          "category": "coding",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_overall_rating",
          "value": 1329.1813396981813,
          "normalized_0_100": 72.69,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 175,
          "lower": 1324.7749267584948,
          "upper": 1333.5877526378677,
          "category": "overall",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        }
      ]
    },
    {
      "model_key": "gemini-1-5-flash-002",
      "display_name": "gemini-1.5-flash-002",
      "provider": "google",
      "aliases": [
        "gemini-1.5-flash-002"
      ],
      "openness": null,
      "license": "Proprietary",
      "parameter_billions": null,
      "context_window": null,
      "modalities": [
        "text"
      ],
      "source_ids": [
        "livebench",
        "lmarena"
      ],
      "composite": {
        "score_0_100": 69.07,
        "confidence": "moderate",
        "components": [
          {
            "source_id": "lmarena",
            "metric": "lmarena_text_overall_rating",
            "normalized_0_100": 67.64,
            "weight": 0.35
          },
          {
            "source_id": "livebench",
            "metric": "livebench_overall_mean_judgment",
            "normalized_0_100": 71.57,
            "weight": 0.2
          }
        ],
        "derived_from": [
          "lmarena:lmarena_text_overall_rating",
          "livebench:livebench_overall_mean_judgment"
        ],
        "limitations": [
          "This is an external model-intelligence prior, not a RunLocalAI local speed or fit measurement.",
          "Provider papers, human-preference ratings, and LiveBench judgments are not interchangeable with quantized local execution.",
          "Use this score only beside Will-It-Run fit, quantization, context, runtime, and measured tok/s evidence."
        ]
      },
      "scores": [
        {
          "source_id": "livebench",
          "metric": "livebench_category_coding_mean_judgment",
          "value": 1,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "coding",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench category aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_category_instruction_following_mean_judgment",
          "value": 1,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "instruction_following",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench category aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_category_language_mean_judgment",
          "value": 0.431373,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "language",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench category aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_overall_mean_judgment",
          "value": 0.715686,
          "normalized_0_100": 71.57,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench judgment aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_task_connections_mean_judgment",
          "value": 1,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "language",
          "task": "connections",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench task aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_task_lcb_generation_mean_judgment",
          "value": 1,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "coding",
          "task": "LCB_generation",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench task aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_task_paraphrase_mean_judgment",
          "value": 1,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "instruction_following",
          "task": "paraphrase",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench task aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_task_plot_unscrambling_mean_judgment",
          "value": 0.294118,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "language",
          "task": "plot_unscrambling",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench task aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_task_story_generation_mean_judgment",
          "value": 1,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "instruction_following",
          "task": "story_generation",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench task aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_task_typos_mean_judgment",
          "value": 0,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "language",
          "task": "typos",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench task aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_chinese_rating",
          "value": 1293.8889127352675,
          "normalized_0_100": 68.49,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 189,
          "lower": 1282.6213591322542,
          "upper": 1305.156466338281,
          "category": "chinese",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_coding_rating",
          "value": 1262.041457564468,
          "normalized_0_100": 64.7,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 232,
          "lower": 1254.083123892128,
          "upper": 1269.9997912368078,
          "category": "coding",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_overall_rating",
          "value": 1286.6970026606846,
          "normalized_0_100": 67.64,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 206,
          "lower": 1282.5228723993457,
          "upper": 1290.8711329220232,
          "category": "overall",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        }
      ]
    },
    {
      "model_key": "deepseek-v3",
      "display_name": "deepseek-v3",
      "provider": "deepseek",
      "aliases": [
        "deepseek-v3"
      ],
      "openness": null,
      "license": "DeepSeek",
      "parameter_billions": null,
      "context_window": null,
      "modalities": [
        "text"
      ],
      "source_ids": [
        "livebench",
        "lmarena"
      ],
      "composite": {
        "score_0_100": 68.65,
        "confidence": "moderate",
        "components": [
          {
            "source_id": "lmarena",
            "metric": "lmarena_text_overall_rating",
            "normalized_0_100": 73.08,
            "weight": 0.35
          },
          {
            "source_id": "livebench",
            "metric": "livebench_overall_mean_judgment",
            "normalized_0_100": 60.91,
            "weight": 0.2
          }
        ],
        "derived_from": [
          "lmarena:lmarena_text_overall_rating",
          "livebench:livebench_overall_mean_judgment"
        ],
        "limitations": [
          "This is an external model-intelligence prior, not a RunLocalAI local speed or fit measurement.",
          "Provider papers, human-preference ratings, and LiveBench judgments are not interchangeable with quantized local execution.",
          "Use this score only beside Will-It-Run fit, quantization, context, runtime, and measured tok/s evidence."
        ]
      },
      "scores": [
        {
          "source_id": "livebench",
          "metric": "livebench_category_coding_mean_judgment",
          "value": 1,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "coding",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench category aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_category_instruction_following_mean_judgment",
          "value": 0.4,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "instruction_following",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench category aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_category_language_mean_judgment",
          "value": 0.563636,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "language",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench category aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_overall_mean_judgment",
          "value": 0.609091,
          "normalized_0_100": 60.91,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench judgment aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_task_lcb_generation_mean_judgment",
          "value": 1,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "coding",
          "task": "LCB_generation",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench task aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_task_paraphrase_mean_judgment",
          "value": 0.4,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "instruction_following",
          "task": "paraphrase",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench task aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_task_plot_unscrambling_mean_judgment",
          "value": 0.254545,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "language",
          "task": "plot_unscrambling",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench task aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_task_typos_mean_judgment",
          "value": 0.666667,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "language",
          "task": "typos",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench task aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_chinese_rating",
          "value": 1337.7020397142833,
          "normalized_0_100": 73.7,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 166,
          "lower": 1322.451070169414,
          "upper": 1352.9530092591526,
          "category": "chinese",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_coding_rating",
          "value": 1325.496890806492,
          "normalized_0_100": 72.25,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 182,
          "lower": 1315.4404378250392,
          "upper": 1335.5533437879444,
          "category": "coding",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_overall_rating",
          "value": 1332.5206456619562,
          "normalized_0_100": 73.08,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 170,
          "lower": 1327.8117305445394,
          "upper": 1337.2295607793733,
          "category": "overall",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        }
      ]
    },
    {
      "model_key": "claude-3-opus-20240229",
      "display_name": "claude-3-opus-20240229",
      "provider": "anthropic",
      "aliases": [
        "claude-3-opus-20240229"
      ],
      "openness": null,
      "license": "Proprietary",
      "parameter_billions": null,
      "context_window": null,
      "modalities": [
        "text"
      ],
      "source_ids": [
        "livebench",
        "lmarena"
      ],
      "composite": {
        "score_0_100": 67.69,
        "confidence": "moderate",
        "components": [
          {
            "source_id": "lmarena",
            "metric": "lmarena_text_overall_rating",
            "normalized_0_100": 64.7,
            "weight": 0.35
          },
          {
            "source_id": "livebench",
            "metric": "livebench_overall_mean_judgment",
            "normalized_0_100": 72.93,
            "weight": 0.2
          }
        ],
        "derived_from": [
          "lmarena:lmarena_text_overall_rating",
          "livebench:livebench_overall_mean_judgment"
        ],
        "limitations": [
          "This is an external model-intelligence prior, not a RunLocalAI local speed or fit measurement.",
          "Provider papers, human-preference ratings, and LiveBench judgments are not interchangeable with quantized local execution.",
          "Use this score only beside Will-It-Run fit, quantization, context, runtime, and measured tok/s evidence."
        ]
      },
      "scores": [
        {
          "source_id": "livebench",
          "metric": "livebench_category_coding_mean_judgment",
          "value": 1,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "coding",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench category aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_category_instruction_following_mean_judgment",
          "value": 0.7,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "instruction_following",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench category aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_category_language_mean_judgment",
          "value": 0.632727,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "language",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench category aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_overall_mean_judgment",
          "value": 0.729293,
          "normalized_0_100": 72.93,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench judgment aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_task_coding_completion_mean_judgment",
          "value": 1,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "coding",
          "task": "coding_completion",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench task aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_task_connections_mean_judgment",
          "value": 0,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "language",
          "task": "connections",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench task aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_task_lcb_generation_mean_judgment",
          "value": 1,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "coding",
          "task": "LCB_generation",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench task aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_task_paraphrase_mean_judgment",
          "value": 0.4,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "instruction_following",
          "task": "paraphrase",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench task aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_task_plot_unscrambling_mean_judgment",
          "value": 0.163636,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "language",
          "task": "plot_unscrambling",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench task aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_task_story_generation_mean_judgment",
          "value": 1,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "instruction_following",
          "task": "story_generation",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench task aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_task_typos_mean_judgment",
          "value": 1,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "language",
          "task": "typos",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench task aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_chinese_rating",
          "value": 1246.812730637115,
          "normalized_0_100": 62.89,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 210,
          "lower": 1239.0782657832758,
          "upper": 1254.5471954909542,
          "category": "chinese",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_coding_rating",
          "value": 1264.7077632786823,
          "normalized_0_100": 65.02,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 231,
          "lower": 1258.8836579870974,
          "upper": 1270.531868570267,
          "category": "coding",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_overall_rating",
          "value": 1262.0526881402443,
          "normalized_0_100": 64.7,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 233,
          "lower": 1259.040951375352,
          "upper": 1265.0644249051365,
          "category": "overall",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        }
      ]
    },
    {
      "model_key": "gemini-2-0-flash-lite-preview-02-05",
      "display_name": "gemini-2.0-flash-lite-preview-02-05",
      "provider": "google",
      "aliases": [
        "gemini-2.0-flash-lite-preview-02-05"
      ],
      "openness": null,
      "license": "Proprietary",
      "parameter_billions": null,
      "context_window": null,
      "modalities": [
        "text"
      ],
      "source_ids": [
        "livebench",
        "lmarena"
      ],
      "composite": {
        "score_0_100": 66.84,
        "confidence": "moderate",
        "components": [
          {
            "source_id": "lmarena",
            "metric": "lmarena_text_overall_rating",
            "normalized_0_100": 72.73,
            "weight": 0.35
          },
          {
            "source_id": "livebench",
            "metric": "livebench_overall_mean_judgment",
            "normalized_0_100": 56.52,
            "weight": 0.2
          }
        ],
        "derived_from": [
          "lmarena:lmarena_text_overall_rating",
          "livebench:livebench_overall_mean_judgment"
        ],
        "limitations": [
          "This is an external model-intelligence prior, not a RunLocalAI local speed or fit measurement.",
          "Provider papers, human-preference ratings, and LiveBench judgments are not interchangeable with quantized local execution.",
          "Use this score only beside Will-It-Run fit, quantization, context, runtime, and measured tok/s evidence."
        ]
      },
      "scores": [
        {
          "source_id": "livebench",
          "metric": "livebench_category_coding_mean_judgment",
          "value": 1,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "coding",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench category aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_category_instruction_following_mean_judgment",
          "value": 0.3,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "instruction_following",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench category aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_category_language_mean_judgment",
          "value": 0.531337,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "language",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench category aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_overall_mean_judgment",
          "value": 0.565241,
          "normalized_0_100": 56.52,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench judgment aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_task_lcb_generation_mean_judgment",
          "value": 1,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "coding",
          "task": "LCB_generation",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench task aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_task_paraphrase_mean_judgment",
          "value": 0.3,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "instruction_following",
          "task": "paraphrase",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench task aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_task_plot_unscrambling_mean_judgment",
          "value": 0.328342,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "language",
          "task": "plot_unscrambling",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench task aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_task_typos_mean_judgment",
          "value": 0.666667,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "language",
          "task": "typos",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench task aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_chinese_rating",
          "value": 1338.4678405096233,
          "normalized_0_100": 73.79,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 165,
          "lower": 1323.638075317689,
          "upper": 1353.2976057015576,
          "category": "chinese",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_coding_rating",
          "value": 1322.493174562972,
          "normalized_0_100": 71.89,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 184,
          "lower": 1312.8119683202376,
          "upper": 1332.1743808057063,
          "category": "coding",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_overall_rating",
          "value": 1329.5613908260248,
          "normalized_0_100": 72.73,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 174,
          "lower": 1325.295520135935,
          "upper": 1333.8272615161145,
          "category": "overall",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        }
      ]
    },
    {
      "model_key": "gemini-1-5-pro-002",
      "display_name": "gemini-1.5-pro-002",
      "provider": "google",
      "aliases": [
        "gemini-1.5-pro-002"
      ],
      "openness": null,
      "license": "Proprietary",
      "parameter_billions": null,
      "context_window": null,
      "modalities": [
        "text"
      ],
      "source_ids": [
        "livebench",
        "lmarena"
      ],
      "composite": {
        "score_0_100": 66.83,
        "confidence": "moderate",
        "components": [
          {
            "source_id": "lmarena",
            "metric": "lmarena_text_overall_rating",
            "normalized_0_100": 71.5,
            "weight": 0.35
          },
          {
            "source_id": "livebench",
            "metric": "livebench_overall_mean_judgment",
            "normalized_0_100": 58.66,
            "weight": 0.2
          }
        ],
        "derived_from": [
          "lmarena:lmarena_text_overall_rating",
          "livebench:livebench_overall_mean_judgment"
        ],
        "limitations": [
          "This is an external model-intelligence prior, not a RunLocalAI local speed or fit measurement.",
          "Provider papers, human-preference ratings, and LiveBench judgments are not interchangeable with quantized local execution.",
          "Use this score only beside Will-It-Run fit, quantization, context, runtime, and measured tok/s evidence."
        ]
      },
      "scores": [
        {
          "source_id": "livebench",
          "metric": "livebench_category_coding_mean_judgment",
          "value": 1,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "coding",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench category aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_category_instruction_following_mean_judgment",
          "value": 1,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "instruction_following",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench category aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_category_language_mean_judgment",
          "value": 0.379947,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "language",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench category aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_overall_mean_judgment",
          "value": 0.586631,
          "normalized_0_100": 58.66,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench judgment aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_task_connections_mean_judgment",
          "value": 0.5,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "language",
          "task": "connections",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench task aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_task_lcb_generation_mean_judgment",
          "value": 1,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "coding",
          "task": "LCB_generation",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench task aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_task_paraphrase_mean_judgment",
          "value": 1,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "instruction_following",
          "task": "paraphrase",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench task aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_task_plot_unscrambling_mean_judgment",
          "value": 0.38984,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "language",
          "task": "plot_unscrambling",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench task aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_task_story_generation_mean_judgment",
          "value": 1,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "instruction_following",
          "task": "story_generation",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench task aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_task_typos_mean_judgment",
          "value": 0.333333,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "language",
          "task": "typos",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench task aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_chinese_rating",
          "value": 1330.4559784435546,
          "normalized_0_100": 72.84,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 168,
          "lower": 1320.7584643613536,
          "upper": 1340.1534925257556,
          "category": "chinese",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_coding_rating",
          "value": 1294.846334624354,
          "normalized_0_100": 68.6,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 205,
          "lower": 1288.082938906124,
          "upper": 1301.6097303425845,
          "category": "coding",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_overall_rating",
          "value": 1319.1675068563404,
          "normalized_0_100": 71.5,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 181,
          "lower": 1315.8266842838957,
          "upper": 1322.508329428785,
          "category": "overall",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        }
      ]
    },
    {
      "model_key": "qwen2-5-max",
      "display_name": "qwen2.5-max",
      "provider": "alibaba",
      "aliases": [
        "qwen2.5-max"
      ],
      "openness": null,
      "license": "Proprietary",
      "parameter_billions": null,
      "context_window": null,
      "modalities": [
        "text"
      ],
      "source_ids": [
        "livebench",
        "lmarena"
      ],
      "composite": {
        "score_0_100": 66.83,
        "confidence": "moderate",
        "components": [
          {
            "source_id": "lmarena",
            "metric": "lmarena_text_overall_rating",
            "normalized_0_100": 77.13,
            "weight": 0.35
          },
          {
            "source_id": "livebench",
            "metric": "livebench_overall_mean_judgment",
            "normalized_0_100": 48.81,
            "weight": 0.2
          }
        ],
        "derived_from": [
          "lmarena:lmarena_text_overall_rating",
          "livebench:livebench_overall_mean_judgment"
        ],
        "limitations": [
          "This is an external model-intelligence prior, not a RunLocalAI local speed or fit measurement.",
          "Provider papers, human-preference ratings, and LiveBench judgments are not interchangeable with quantized local execution.",
          "Use this score only beside Will-It-Run fit, quantization, context, runtime, and measured tok/s evidence."
        ]
      },
      "scores": [
        {
          "source_id": "livebench",
          "metric": "livebench_category_coding_mean_judgment",
          "value": 1,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "coding",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench category aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_category_instruction_following_mean_judgment",
          "value": 0.333333,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "instruction_following",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench category aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_category_language_mean_judgment",
          "value": 0.416701,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "language",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench category aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_overall_mean_judgment",
          "value": 0.48812,
          "normalized_0_100": 48.81,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench judgment aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_task_coding_completion_mean_judgment",
          "value": 1,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "coding",
          "task": "coding_completion",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench task aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_task_paraphrase_mean_judgment",
          "value": 0.333333,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "instruction_following",
          "task": "paraphrase",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench task aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_task_plot_unscrambling_mean_judgment",
          "value": 0.361169,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "language",
          "task": "plot_unscrambling",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench task aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_task_typos_mean_judgment",
          "value": 0.5,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "language",
          "task": "typos",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench task aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_chinese_rating",
          "value": 1379.899594008577,
          "normalized_0_100": 78.72,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 139,
          "lower": 1366.8107381402979,
          "upper": 1392.9884498768563,
          "category": "chinese",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_coding_rating",
          "value": 1359.9259327652244,
          "normalized_0_100": 76.34,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 165,
          "lower": 1351.808191753376,
          "upper": 1368.0436737770724,
          "category": "coding",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_overall_rating",
          "value": 1366.5342028237467,
          "normalized_0_100": 77.13,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 136,
          "lower": 1362.480936728285,
          "upper": 1370.5874689192083,
          "category": "overall",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        }
      ]
    },
    {
      "model_key": "qwen1-5-110b-chat",
      "display_name": "qwen1.5-110b-chat",
      "provider": "alibaba",
      "aliases": [
        "Qwen1.5-110B-Chat",
        "qwen1.5-110b-chat"
      ],
      "openness": null,
      "license": "Qianwen LICENSE",
      "parameter_billions": null,
      "context_window": null,
      "modalities": [
        "text"
      ],
      "source_ids": [
        "livebench",
        "lmarena"
      ],
      "composite": {
        "score_0_100": 66.38,
        "confidence": "moderate",
        "components": [
          {
            "source_id": "lmarena",
            "metric": "lmarena_text_overall_rating",
            "normalized_0_100": 54.31,
            "weight": 0.35
          },
          {
            "source_id": "livebench",
            "metric": "livebench_overall_mean_judgment",
            "normalized_0_100": 87.5,
            "weight": 0.2
          }
        ],
        "derived_from": [
          "lmarena:lmarena_text_overall_rating",
          "livebench:livebench_overall_mean_judgment"
        ],
        "limitations": [
          "This is an external model-intelligence prior, not a RunLocalAI local speed or fit measurement.",
          "Provider papers, human-preference ratings, and LiveBench judgments are not interchangeable with quantized local execution.",
          "Use this score only beside Will-It-Run fit, quantization, context, runtime, and measured tok/s evidence."
        ]
      },
      "scores": [
        {
          "source_id": "livebench",
          "metric": "livebench_category_coding_mean_judgment",
          "value": 1,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "coding",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench category aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_category_instruction_following_mean_judgment",
          "value": 1,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "instruction_following",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench category aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_category_language_mean_judgment",
          "value": 0.5,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "language",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench category aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_overall_mean_judgment",
          "value": 0.875,
          "normalized_0_100": 87.5,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench judgment aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_task_coding_completion_mean_judgment",
          "value": 1,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "coding",
          "task": "coding_completion",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench task aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_task_connections_mean_judgment",
          "value": 0.5,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "language",
          "task": "connections",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench task aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_task_lcb_generation_mean_judgment",
          "value": 1,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "coding",
          "task": "LCB_generation",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench task aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_task_story_generation_mean_judgment",
          "value": 1,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "instruction_following",
          "task": "story_generation",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench task aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_chinese_rating",
          "value": 1204.8284679424569,
          "normalized_0_100": 57.9,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 244,
          "lower": 1191.7066355102525,
          "upper": 1217.950300374661,
          "category": "chinese",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_coding_rating",
          "value": 1184.4279032624577,
          "normalized_0_100": 55.47,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 268,
          "lower": 1174.1548583630813,
          "upper": 1194.7009481618343,
          "category": "coding",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_overall_rating",
          "value": 1174.6147916917223,
          "normalized_0_100": 54.31,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 275,
          "lower": 1169.0922269775,
          "upper": 1180.1373564059445,
          "category": "overall",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        }
      ]
    },
    {
      "model_key": "gemma-3-27b-it",
      "display_name": "gemma-3-27b-it",
      "provider": "google",
      "aliases": [
        "gemma-3-27b-it"
      ],
      "openness": null,
      "license": "Gemma",
      "parameter_billions": null,
      "context_window": null,
      "modalities": [
        "text"
      ],
      "source_ids": [
        "livebench",
        "lmarena"
      ],
      "composite": {
        "score_0_100": 66.01,
        "confidence": "moderate",
        "components": [
          {
            "source_id": "lmarena",
            "metric": "lmarena_text_overall_rating",
            "normalized_0_100": 76.13,
            "weight": 0.35
          },
          {
            "source_id": "livebench",
            "metric": "livebench_overall_mean_judgment",
            "normalized_0_100": 48.31,
            "weight": 0.2
          }
        ],
        "derived_from": [
          "lmarena:lmarena_text_overall_rating",
          "livebench:livebench_overall_mean_judgment"
        ],
        "limitations": [
          "This is an external model-intelligence prior, not a RunLocalAI local speed or fit measurement.",
          "Provider papers, human-preference ratings, and LiveBench judgments are not interchangeable with quantized local execution.",
          "Use this score only beside Will-It-Run fit, quantization, context, runtime, and measured tok/s evidence."
        ]
      },
      "scores": [
        {
          "source_id": "livebench",
          "metric": "livebench_category_coding_mean_judgment",
          "value": 1,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "coding",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench category aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_category_instruction_following_mean_judgment",
          "value": 0.4,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "instruction_following",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench category aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_category_language_mean_judgment",
          "value": 0.374599,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "language",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench category aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_overall_mean_judgment",
          "value": 0.483066,
          "normalized_0_100": 48.31,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench judgment aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_task_lcb_generation_mean_judgment",
          "value": 1,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "coding",
          "task": "LCB_generation",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench task aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_task_paraphrase_mean_judgment",
          "value": 0.4,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "instruction_following",
          "task": "paraphrase",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench task aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_task_plot_unscrambling_mean_judgment",
          "value": 0.249198,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "language",
          "task": "plot_unscrambling",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench task aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_task_typos_mean_judgment",
          "value": 0.5,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "language",
          "task": "typos",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench task aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_chinese_rating",
          "value": 1346.5579279995238,
          "normalized_0_100": 74.75,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 161,
          "lower": 1334.0653425241326,
          "upper": 1359.050513474915,
          "category": "chinese",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_coding_rating",
          "value": 1322.554407777167,
          "normalized_0_100": 71.9,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 183,
          "lower": 1315.7357743432844,
          "upper": 1329.3730412110492,
          "category": "coding",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_overall_rating",
          "value": 1358.142665162245,
          "normalized_0_100": 76.13,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 146,
          "lower": 1354.5227753825588,
          "upper": 1361.762554941931,
          "category": "overall",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        }
      ]
    },
    {
      "model_key": "qwen1-5-72b-chat",
      "display_name": "qwen1.5-72b-chat",
      "provider": "alibaba",
      "aliases": [
        "Qwen1.5-72B-Chat",
        "qwen1.5-72b-chat"
      ],
      "openness": null,
      "license": "Qianwen LICENSE",
      "parameter_billions": null,
      "context_window": null,
      "modalities": [
        "text"
      ],
      "source_ids": [
        "livebench",
        "lmarena"
      ],
      "composite": {
        "score_0_100": 65.74,
        "confidence": "moderate",
        "components": [
          {
            "source_id": "lmarena",
            "metric": "lmarena_text_overall_rating",
            "normalized_0_100": 53.31,
            "weight": 0.35
          },
          {
            "source_id": "livebench",
            "metric": "livebench_overall_mean_judgment",
            "normalized_0_100": 87.5,
            "weight": 0.2
          }
        ],
        "derived_from": [
          "lmarena:lmarena_text_overall_rating",
          "livebench:livebench_overall_mean_judgment"
        ],
        "limitations": [
          "This is an external model-intelligence prior, not a RunLocalAI local speed or fit measurement.",
          "Provider papers, human-preference ratings, and LiveBench judgments are not interchangeable with quantized local execution.",
          "Use this score only beside Will-It-Run fit, quantization, context, runtime, and measured tok/s evidence."
        ]
      },
      "scores": [
        {
          "source_id": "livebench",
          "metric": "livebench_category_coding_mean_judgment",
          "value": 1,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "coding",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench category aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_category_instruction_following_mean_judgment",
          "value": 1,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "instruction_following",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench category aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_category_language_mean_judgment",
          "value": 0.5,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "language",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench category aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_overall_mean_judgment",
          "value": 0.875,
          "normalized_0_100": 87.5,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench judgment aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_task_coding_completion_mean_judgment",
          "value": 1,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "coding",
          "task": "coding_completion",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench task aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_task_connections_mean_judgment",
          "value": 0.5,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "language",
          "task": "connections",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench task aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_task_lcb_generation_mean_judgment",
          "value": 1,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "coding",
          "task": "LCB_generation",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench task aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_task_story_generation_mean_judgment",
          "value": 1,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "instruction_following",
          "task": "story_generation",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench task aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_chinese_rating",
          "value": 1185.2152926308627,
          "normalized_0_100": 55.57,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 253,
          "lower": 1173.064002518896,
          "upper": 1197.3665827428295,
          "category": "chinese",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_coding_rating",
          "value": 1165.9477037926076,
          "normalized_0_100": 53.28,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 277,
          "lower": 1156.3877714585747,
          "upper": 1175.5076361266406,
          "category": "coding",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_overall_rating",
          "value": 1166.195991115449,
          "normalized_0_100": 53.31,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 278,
          "lower": 1161.0009065439738,
          "upper": 1171.391075686924,
          "category": "overall",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        }
      ]
    },
    {
      "model_key": "gemini-1-5-pro-001",
      "display_name": "gemini-1.5-pro-001",
      "provider": "google",
      "aliases": [
        "gemini-1.5-pro-001"
      ],
      "openness": null,
      "license": "Proprietary",
      "parameter_billions": null,
      "context_window": null,
      "modalities": [
        "text"
      ],
      "source_ids": [
        "livebench",
        "lmarena"
      ],
      "composite": {
        "score_0_100": 65.5,
        "confidence": "moderate",
        "components": [
          {
            "source_id": "lmarena",
            "metric": "lmarena_text_overall_rating",
            "normalized_0_100": 66.07,
            "weight": 0.35
          },
          {
            "source_id": "livebench",
            "metric": "livebench_overall_mean_judgment",
            "normalized_0_100": 64.51,
            "weight": 0.2
          }
        ],
        "derived_from": [
          "lmarena:lmarena_text_overall_rating",
          "livebench:livebench_overall_mean_judgment"
        ],
        "limitations": [
          "This is an external model-intelligence prior, not a RunLocalAI local speed or fit measurement.",
          "Provider papers, human-preference ratings, and LiveBench judgments are not interchangeable with quantized local execution.",
          "Use this score only beside Will-It-Run fit, quantization, context, runtime, and measured tok/s evidence."
        ]
      },
      "scores": [
        {
          "source_id": "livebench",
          "metric": "livebench_category_coding_mean_judgment",
          "value": 1,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "coding",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench category aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_category_instruction_following_mean_judgment",
          "value": 0.7,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "instruction_following",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench category aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_category_language_mean_judgment",
          "value": 0.490196,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "language",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench category aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_overall_mean_judgment",
          "value": 0.645098,
          "normalized_0_100": 64.51,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench judgment aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_task_connections_mean_judgment",
          "value": 0,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "language",
          "task": "connections",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench task aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_task_lcb_generation_mean_judgment",
          "value": 1,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "coding",
          "task": "LCB_generation",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench task aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_task_paraphrase_mean_judgment",
          "value": 0.4,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "instruction_following",
          "task": "paraphrase",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench task aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_task_plot_unscrambling_mean_judgment",
          "value": 0.470588,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "language",
          "task": "plot_unscrambling",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench task aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_task_story_generation_mean_judgment",
          "value": 1,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "instruction_following",
          "task": "story_generation",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench task aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_task_typos_mean_judgment",
          "value": 1,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "language",
          "task": "typos",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench task aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_chinese_rating",
          "value": 1273.8029889153936,
          "normalized_0_100": 66.1,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 195,
          "lower": 1264.1203707778968,
          "upper": 1283.4856070528904,
          "category": "chinese",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_coding_rating",
          "value": 1266.8163379640084,
          "normalized_0_100": 65.27,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 230,
          "lower": 1259.363971063385,
          "upper": 1274.268704864632,
          "category": "coding",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_overall_rating",
          "value": 1273.5386105466528,
          "normalized_0_100": 66.07,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 222,
          "lower": 1269.583347264473,
          "upper": 1277.4938738288324,
          "category": "overall",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        }
      ]
    },
    {
      "model_key": "gpt-3-5-turbo-0125",
      "display_name": "gpt-3.5-turbo-0125",
      "provider": "openai",
      "aliases": [
        "gpt-3.5-turbo-0125"
      ],
      "openness": null,
      "license": "Proprietary",
      "parameter_billions": null,
      "context_window": null,
      "modalities": [
        "text"
      ],
      "source_ids": [
        "livebench",
        "lmarena"
      ],
      "composite": {
        "score_0_100": 62.46,
        "confidence": "moderate",
        "components": [
          {
            "source_id": "lmarena",
            "metric": "lmarena_text_overall_rating",
            "normalized_0_100": 48.4,
            "weight": 0.35
          },
          {
            "source_id": "livebench",
            "metric": "livebench_overall_mean_judgment",
            "normalized_0_100": 87.06,
            "weight": 0.2
          }
        ],
        "derived_from": [
          "lmarena:lmarena_text_overall_rating",
          "livebench:livebench_overall_mean_judgment"
        ],
        "limitations": [
          "This is an external model-intelligence prior, not a RunLocalAI local speed or fit measurement.",
          "Provider papers, human-preference ratings, and LiveBench judgments are not interchangeable with quantized local execution.",
          "Use this score only beside Will-It-Run fit, quantization, context, runtime, and measured tok/s evidence."
        ]
      },
      "scores": [
        {
          "source_id": "livebench",
          "metric": "livebench_category_coding_mean_judgment",
          "value": 1,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "coding",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench category aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_category_instruction_following_mean_judgment",
          "value": 1,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "instruction_following",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench category aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_category_language_mean_judgment",
          "value": 0.784314,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "language",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench category aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_overall_mean_judgment",
          "value": 0.870588,
          "normalized_0_100": 87.06,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench judgment aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_task_connections_mean_judgment",
          "value": 1,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "language",
          "task": "connections",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench task aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_task_lcb_generation_mean_judgment",
          "value": 1,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "coding",
          "task": "LCB_generation",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench task aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_task_plot_unscrambling_mean_judgment",
          "value": 0.352941,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "language",
          "task": "plot_unscrambling",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench task aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_task_story_generation_mean_judgment",
          "value": 1,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "instruction_following",
          "task": "story_generation",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench task aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_task_typos_mean_judgment",
          "value": 1,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "language",
          "task": "typos",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench task aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_chinese_rating",
          "value": 1074.3681963197978,
          "normalized_0_100": 42.39,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 288,
          "lower": 1064.1723951127603,
          "upper": 1084.5639975268355,
          "category": "chinese",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_coding_rating",
          "value": 1136.9401296707322,
          "normalized_0_100": 49.83,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 289,
          "lower": 1128.8494001981635,
          "upper": 1145.0308591433006,
          "category": "coding",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_overall_rating",
          "value": 1124.9639882309173,
          "normalized_0_100": 48.4,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 298,
          "lower": 1120.2915572148772,
          "upper": 1129.6364192469575,
          "category": "overall",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        }
      ]
    },
    {
      "model_key": "claude-3-5-haiku-20241022",
      "display_name": "claude-3-5-haiku-20241022",
      "provider": "anthropic",
      "aliases": [
        "claude-3-5-haiku-20241022"
      ],
      "openness": null,
      "license": "Proprietary",
      "parameter_billions": null,
      "context_window": null,
      "modalities": [
        "text"
      ],
      "source_ids": [
        "livebench",
        "lmarena"
      ],
      "composite": {
        "score_0_100": 62.19,
        "confidence": "moderate",
        "components": [
          {
            "source_id": "lmarena",
            "metric": "lmarena_text_overall_rating",
            "normalized_0_100": 63.91,
            "weight": 0.35
          },
          {
            "source_id": "livebench",
            "metric": "livebench_overall_mean_judgment",
            "normalized_0_100": 59.19,
            "weight": 0.2
          }
        ],
        "derived_from": [
          "lmarena:lmarena_text_overall_rating",
          "livebench:livebench_overall_mean_judgment"
        ],
        "limitations": [
          "This is an external model-intelligence prior, not a RunLocalAI local speed or fit measurement.",
          "Provider papers, human-preference ratings, and LiveBench judgments are not interchangeable with quantized local execution.",
          "Use this score only beside Will-It-Run fit, quantization, context, runtime, and measured tok/s evidence."
        ]
      },
      "scores": [
        {
          "source_id": "livebench",
          "metric": "livebench_category_coding_mean_judgment",
          "value": 1,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "coding",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench category aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_category_instruction_following_mean_judgment",
          "value": 0.6,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "instruction_following",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench category aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_category_language_mean_judgment",
          "value": 0.425455,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "language",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench category aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_overall_mean_judgment",
          "value": 0.591919,
          "normalized_0_100": 59.19,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench judgment aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_task_coding_completion_mean_judgment",
          "value": 1,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "coding",
          "task": "coding_completion",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench task aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_task_connections_mean_judgment",
          "value": 0,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "language",
          "task": "connections",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench task aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_task_lcb_generation_mean_judgment",
          "value": 1,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "coding",
          "task": "LCB_generation",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench task aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_task_paraphrase_mean_judgment",
          "value": 0.2,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "instruction_following",
          "task": "paraphrase",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench task aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_task_plot_unscrambling_mean_judgment",
          "value": 0.127273,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "language",
          "task": "plot_unscrambling",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench task aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_task_story_generation_mean_judgment",
          "value": 1,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "instruction_following",
          "task": "story_generation",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench task aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_task_typos_mean_judgment",
          "value": 0.666667,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "language",
          "task": "typos",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench task aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_chinese_rating",
          "value": 1224.202210520571,
          "normalized_0_100": 60.2,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 231,
          "lower": 1214.5720610001572,
          "upper": 1233.8323600409844,
          "category": "chinese",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_coding_rating",
          "value": 1286.792930587169,
          "normalized_0_100": 67.65,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 214,
          "lower": 1280.9059283136537,
          "upper": 1292.6799328606844,
          "category": "coding",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_overall_rating",
          "value": 1255.3755713279434,
          "normalized_0_100": 63.91,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 237,
          "lower": 1252.198045333188,
          "upper": 1258.5530973226987,
          "category": "overall",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        }
      ]
    },
    {
      "model_key": "gpt-4-0613",
      "display_name": "gpt-4-0613",
      "provider": "openai",
      "aliases": [
        "gpt-4-0613"
      ],
      "openness": null,
      "license": "Proprietary",
      "parameter_billions": null,
      "context_window": null,
      "modalities": [
        "text"
      ],
      "source_ids": [
        "livebench",
        "lmarena"
      ],
      "composite": {
        "score_0_100": 62.16,
        "confidence": "moderate",
        "components": [
          {
            "source_id": "lmarena",
            "metric": "lmarena_text_overall_rating",
            "normalized_0_100": 55.67,
            "weight": 0.35
          },
          {
            "source_id": "livebench",
            "metric": "livebench_overall_mean_judgment",
            "normalized_0_100": 73.53,
            "weight": 0.2
          }
        ],
        "derived_from": [
          "lmarena:lmarena_text_overall_rating",
          "livebench:livebench_overall_mean_judgment"
        ],
        "limitations": [
          "This is an external model-intelligence prior, not a RunLocalAI local speed or fit measurement.",
          "Provider papers, human-preference ratings, and LiveBench judgments are not interchangeable with quantized local execution.",
          "Use this score only beside Will-It-Run fit, quantization, context, runtime, and measured tok/s evidence."
        ]
      },
      "scores": [
        {
          "source_id": "livebench",
          "metric": "livebench_category_coding_mean_judgment",
          "value": 0.5,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "coding",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench category aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_category_instruction_following_mean_judgment",
          "value": 1,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "instruction_following",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench category aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_category_language_mean_judgment",
          "value": 0.803922,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "language",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench category aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_overall_mean_judgment",
          "value": 0.735294,
          "normalized_0_100": 73.53,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench judgment aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_task_connections_mean_judgment",
          "value": 1,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "language",
          "task": "connections",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench task aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_task_lcb_generation_mean_judgment",
          "value": 0.5,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "coding",
          "task": "LCB_generation",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench task aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_task_plot_unscrambling_mean_judgment",
          "value": 0.411765,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "language",
          "task": "plot_unscrambling",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench task aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_task_story_generation_mean_judgment",
          "value": 1,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "instruction_following",
          "task": "story_generation",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench task aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_task_typos_mean_judgment",
          "value": 1,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "language",
          "task": "typos",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench task aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_chinese_rating",
          "value": 1135.1987364534589,
          "normalized_0_100": 49.62,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 270,
          "lower": 1124.9693422777375,
          "upper": 1145.4281306291803,
          "category": "chinese",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_coding_rating",
          "value": 1187.9125828015804,
          "normalized_0_100": 55.89,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 265,
          "lower": 1179.9426215882027,
          "upper": 1195.882544014958,
          "category": "coding",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_overall_rating",
          "value": 1186.0505011303167,
          "normalized_0_100": 55.67,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 272,
          "lower": 1182.0160648777826,
          "upper": 1190.084937382851,
          "category": "overall",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        }
      ]
    },
    {
      "model_key": "claude-3-haiku-20240307",
      "display_name": "claude-3-haiku-20240307",
      "provider": "anthropic",
      "aliases": [
        "claude-3-haiku-20240307"
      ],
      "openness": null,
      "license": "Proprietary",
      "parameter_billions": null,
      "context_window": null,
      "modalities": [
        "text"
      ],
      "source_ids": [
        "livebench",
        "lmarena"
      ],
      "composite": {
        "score_0_100": 62.14,
        "confidence": "moderate",
        "components": [
          {
            "source_id": "lmarena",
            "metric": "lmarena_text_overall_rating",
            "normalized_0_100": 56.7,
            "weight": 0.35
          },
          {
            "source_id": "livebench",
            "metric": "livebench_overall_mean_judgment",
            "normalized_0_100": 71.67,
            "weight": 0.2
          }
        ],
        "derived_from": [
          "lmarena:lmarena_text_overall_rating",
          "livebench:livebench_overall_mean_judgment"
        ],
        "limitations": [
          "This is an external model-intelligence prior, not a RunLocalAI local speed or fit measurement.",
          "Provider papers, human-preference ratings, and LiveBench judgments are not interchangeable with quantized local execution.",
          "Use this score only beside Will-It-Run fit, quantization, context, runtime, and measured tok/s evidence."
        ]
      },
      "scores": [
        {
          "source_id": "livebench",
          "metric": "livebench_category_coding_mean_judgment",
          "value": 1,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "coding",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench category aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_category_instruction_following_mean_judgment",
          "value": 0.65,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "instruction_following",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench category aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_category_language_mean_judgment",
          "value": 0.5,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "language",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench category aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_overall_mean_judgment",
          "value": 0.716667,
          "normalized_0_100": 71.67,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench judgment aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_task_coding_completion_mean_judgment",
          "value": 1,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "coding",
          "task": "coding_completion",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench task aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_task_connections_mean_judgment",
          "value": 0,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "language",
          "task": "connections",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench task aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_task_lcb_generation_mean_judgment",
          "value": 1,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "coding",
          "task": "LCB_generation",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench task aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_task_paraphrase_mean_judgment",
          "value": 0.3,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "instruction_following",
          "task": "paraphrase",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench task aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_task_story_generation_mean_judgment",
          "value": 1,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "instruction_following",
          "task": "story_generation",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench task aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_task_typos_mean_judgment",
          "value": 1,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "language",
          "task": "typos",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench task aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_chinese_rating",
          "value": 1153.762362976252,
          "normalized_0_100": 51.83,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 262,
          "lower": 1145.0620408854745,
          "upper": 1162.4626850670297,
          "category": "chinese",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_coding_rating",
          "value": 1199.4971964183933,
          "normalized_0_100": 57.27,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 259,
          "lower": 1192.6547698172758,
          "upper": 1206.3396230195108,
          "category": "coding",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_overall_rating",
          "value": 1194.7225057168382,
          "normalized_0_100": 56.7,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 265,
          "lower": 1190.997793353255,
          "upper": 1198.4472180804216,
          "category": "overall",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        }
      ]
    },
    {
      "model_key": "gemma-2-27b-it",
      "display_name": "gemma-2-27b-it",
      "provider": "google",
      "aliases": [
        "gemma-2-27b-it"
      ],
      "openness": null,
      "license": "Gemma license",
      "parameter_billions": null,
      "context_window": null,
      "modalities": [
        "text"
      ],
      "source_ids": [
        "livebench",
        "lmarena"
      ],
      "composite": {
        "score_0_100": 61.38,
        "confidence": "moderate",
        "components": [
          {
            "source_id": "lmarena",
            "metric": "lmarena_text_overall_rating",
            "normalized_0_100": 61.06,
            "weight": 0.35
          },
          {
            "source_id": "livebench",
            "metric": "livebench_overall_mean_judgment",
            "normalized_0_100": 61.95,
            "weight": 0.2
          }
        ],
        "derived_from": [
          "lmarena:lmarena_text_overall_rating",
          "livebench:livebench_overall_mean_judgment"
        ],
        "limitations": [
          "This is an external model-intelligence prior, not a RunLocalAI local speed or fit measurement.",
          "Provider papers, human-preference ratings, and LiveBench judgments are not interchangeable with quantized local execution.",
          "Use this score only beside Will-It-Run fit, quantization, context, runtime, and measured tok/s evidence."
        ]
      },
      "scores": [
        {
          "source_id": "livebench",
          "metric": "livebench_category_coding_mean_judgment",
          "value": 0.5,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "coding",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench category aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_category_instruction_following_mean_judgment",
          "value": 1,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "instruction_following",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench category aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_category_language_mean_judgment",
          "value": 0.51508,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "language",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench category aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_overall_mean_judgment",
          "value": 0.619489,
          "normalized_0_100": 61.95,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench judgment aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_task_connections_mean_judgment",
          "value": 1,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "language",
          "task": "connections",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench task aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_task_lcb_generation_mean_judgment",
          "value": 0.5,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "coding",
          "task": "LCB_generation",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench task aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_task_paraphrase_mean_judgment",
          "value": 1,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "instruction_following",
          "task": "paraphrase",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench task aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_task_plot_unscrambling_mean_judgment",
          "value": 0.287701,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "language",
          "task": "plot_unscrambling",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench task aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_task_story_generation_mean_judgment",
          "value": 1,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "instruction_following",
          "task": "story_generation",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench task aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_task_typos_mean_judgment",
          "value": 0.5,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "language",
          "task": "typos",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench task aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_chinese_rating",
          "value": 1220.0612986909473,
          "normalized_0_100": 59.71,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 234,
          "lower": 1211.4937282123524,
          "upper": 1228.6288691695424,
          "category": "chinese",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_coding_rating",
          "value": 1211.4384147773749,
          "normalized_0_100": 58.69,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 253,
          "lower": 1205.122868492971,
          "upper": 1217.7539610617785,
          "category": "coding",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_overall_rating",
          "value": 1231.4165176608817,
          "normalized_0_100": 61.06,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 244,
          "lower": 1228.0801433908473,
          "upper": 1234.7528919309163,
          "category": "overall",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        }
      ]
    },
    {
      "model_key": "command-a-03-2025",
      "display_name": "command-a-03-2025",
      "provider": "cohere",
      "aliases": [
        "command-a-03-2025"
      ],
      "openness": null,
      "license": "CC-BY-NC-4.0",
      "parameter_billions": null,
      "context_window": null,
      "modalities": [
        "text"
      ],
      "source_ids": [
        "livebench",
        "lmarena"
      ],
      "composite": {
        "score_0_100": 60.74,
        "confidence": "moderate",
        "components": [
          {
            "source_id": "lmarena",
            "metric": "lmarena_text_overall_rating",
            "normalized_0_100": 72.94,
            "weight": 0.35
          },
          {
            "source_id": "livebench",
            "metric": "livebench_overall_mean_judgment",
            "normalized_0_100": 39.39,
            "weight": 0.2
          }
        ],
        "derived_from": [
          "lmarena:lmarena_text_overall_rating",
          "livebench:livebench_overall_mean_judgment"
        ],
        "limitations": [
          "This is an external model-intelligence prior, not a RunLocalAI local speed or fit measurement.",
          "Provider papers, human-preference ratings, and LiveBench judgments are not interchangeable with quantized local execution.",
          "Use this score only beside Will-It-Run fit, quantization, context, runtime, and measured tok/s evidence."
        ]
      },
      "scores": [
        {
          "source_id": "livebench",
          "metric": "livebench_category_language_mean_judgment",
          "value": 0.393939,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "language",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench category aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_overall_mean_judgment",
          "value": 0.393939,
          "normalized_0_100": 39.39,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench judgment aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_task_plot_unscrambling_mean_judgment",
          "value": 0.181818,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "language",
          "task": "plot_unscrambling",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench task aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_task_typos_mean_judgment",
          "value": 0.5,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "language",
          "task": "typos",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench task aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_chinese_rating",
          "value": 1323.5805706189212,
          "normalized_0_100": 72.02,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 174,
          "lower": 1312.3761082274823,
          "upper": 1334.7850330103604,
          "category": "chinese",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_coding_rating",
          "value": 1331.109333031995,
          "normalized_0_100": 72.92,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 179,
          "lower": 1324.9213410058926,
          "upper": 1337.2973250580972,
          "category": "coding",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_overall_rating",
          "value": 1331.306300312117,
          "normalized_0_100": 72.94,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 172,
          "lower": 1327.883213119835,
          "upper": 1334.7293875043988,
          "category": "overall",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        }
      ]
    },
    {
      "model_key": "llama-4-maverick-17b-128e-instruct",
      "display_name": "llama-4-maverick-17b-128e-instruct",
      "provider": "meta",
      "aliases": [
        "llama-4-maverick-17b-128e-instruct"
      ],
      "openness": null,
      "license": "Llama 4",
      "parameter_billions": null,
      "context_window": null,
      "modalities": [
        "text"
      ],
      "source_ids": [
        "livebench",
        "lmarena"
      ],
      "composite": {
        "score_0_100": 60.3,
        "confidence": "moderate",
        "components": [
          {
            "source_id": "lmarena",
            "metric": "lmarena_text_overall_rating",
            "normalized_0_100": 67.75,
            "weight": 0.35
          },
          {
            "source_id": "livebench",
            "metric": "livebench_overall_mean_judgment",
            "normalized_0_100": 47.25,
            "weight": 0.2
          }
        ],
        "derived_from": [
          "lmarena:lmarena_text_overall_rating",
          "livebench:livebench_overall_mean_judgment"
        ],
        "limitations": [
          "This is an external model-intelligence prior, not a RunLocalAI local speed or fit measurement.",
          "Provider papers, human-preference ratings, and LiveBench judgments are not interchangeable with quantized local execution.",
          "Use this score only beside Will-It-Run fit, quantization, context, runtime, and measured tok/s evidence."
        ]
      },
      "scores": [
        {
          "source_id": "livebench",
          "metric": "livebench_category_instruction_following_mean_judgment",
          "value": 1,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "instruction_following",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench category aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_category_language_mean_judgment",
          "value": 0.367019,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "language",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench category aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_overall_mean_judgment",
          "value": 0.472516,
          "normalized_0_100": 47.25,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench judgment aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_task_paraphrase_mean_judgment",
          "value": 1,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "instruction_following",
          "task": "paraphrase",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench task aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_task_plot_unscrambling_mean_judgment",
          "value": 0.278366,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "language",
          "task": "plot_unscrambling",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench task aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_task_typos_mean_judgment",
          "value": 0.5,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "language",
          "task": "typos",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench task aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_chinese_rating",
          "value": 1272.5524536342857,
          "normalized_0_100": 65.95,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 197,
          "lower": 1259.2325558984885,
          "upper": 1285.872351370083,
          "category": "chinese",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_coding_rating",
          "value": 1303.0411705944844,
          "normalized_0_100": 69.58,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 201,
          "lower": 1295.6127260648354,
          "upper": 1310.469615124133,
          "category": "coding",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_overall_rating",
          "value": 1287.6821849365515,
          "normalized_0_100": 67.75,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 204,
          "lower": 1283.4583883163812,
          "upper": 1291.905981556722,
          "category": "overall",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        }
      ]
    },
    {
      "model_key": "amazon-nova-pro-v1-0",
      "display_name": "amazon-nova-pro-v1.0",
      "provider": "amazon",
      "aliases": [
        "amazon-nova-pro-v1.0",
        "amazon.nova-pro-v1:0"
      ],
      "openness": null,
      "license": "Proprietary",
      "parameter_billions": null,
      "context_window": null,
      "modalities": [
        "text"
      ],
      "source_ids": [
        "livebench",
        "lmarena"
      ],
      "composite": {
        "score_0_100": 60.27,
        "confidence": "moderate",
        "components": [
          {
            "source_id": "lmarena",
            "metric": "lmarena_text_overall_rating",
            "normalized_0_100": 64.31,
            "weight": 0.35
          },
          {
            "source_id": "livebench",
            "metric": "livebench_overall_mean_judgment",
            "normalized_0_100": 53.19,
            "weight": 0.2
          }
        ],
        "derived_from": [
          "lmarena:lmarena_text_overall_rating",
          "livebench:livebench_overall_mean_judgment"
        ],
        "limitations": [
          "This is an external model-intelligence prior, not a RunLocalAI local speed or fit measurement.",
          "Provider papers, human-preference ratings, and LiveBench judgments are not interchangeable with quantized local execution.",
          "Use this score only beside Will-It-Run fit, quantization, context, runtime, and measured tok/s evidence."
        ]
      },
      "scores": [
        {
          "source_id": "livebench",
          "metric": "livebench_category_coding_mean_judgment",
          "value": 1,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "coding",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench category aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_category_instruction_following_mean_judgment",
          "value": 0.3,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "instruction_following",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench category aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_category_language_mean_judgment",
          "value": 0.355892,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "language",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench category aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_overall_mean_judgment",
          "value": 0.531938,
          "normalized_0_100": 53.19,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench judgment aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_task_coding_completion_mean_judgment",
          "value": 1,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "coding",
          "task": "coding_completion",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench task aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_task_lcb_generation_mean_judgment",
          "value": 1,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "coding",
          "task": "LCB_generation",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench task aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_task_paraphrase_mean_judgment",
          "value": 0.3,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "instruction_following",
          "task": "paraphrase",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench task aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_task_plot_unscrambling_mean_judgment",
          "value": 0.211785,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "language",
          "task": "plot_unscrambling",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench task aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_task_typos_mean_judgment",
          "value": 0.5,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "language",
          "task": "typos",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench task aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_chinese_rating",
          "value": 1243.106213271401,
          "normalized_0_100": 62.45,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 213,
          "lower": 1228.9036583167751,
          "upper": 1257.308768226027,
          "category": "chinese",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_coding_rating",
          "value": 1270.4300795967488,
          "normalized_0_100": 65.7,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 223,
          "lower": 1261.4132236584724,
          "upper": 1279.4469355350254,
          "category": "coding",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_overall_rating",
          "value": 1258.7054621264192,
          "normalized_0_100": 64.31,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 235,
          "lower": 1254.1875701354152,
          "upper": 1263.223354117423,
          "category": "overall",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        }
      ]
    },
    {
      "model_key": "starling-lm-7b-beta",
      "display_name": "starling-lm-7b-beta",
      "provider": null,
      "aliases": [
        "Starling-LM-7B-beta",
        "starling-lm-7b-beta"
      ],
      "openness": null,
      "license": "Apache-2.0",
      "parameter_billions": null,
      "context_window": null,
      "modalities": [
        "text"
      ],
      "source_ids": [
        "livebench",
        "lmarena"
      ],
      "composite": {
        "score_0_100": 58.61,
        "confidence": "moderate",
        "components": [
          {
            "source_id": "lmarena",
            "metric": "lmarena_text_overall_rating",
            "normalized_0_100": 49.25,
            "weight": 0.35
          },
          {
            "source_id": "livebench",
            "metric": "livebench_overall_mean_judgment",
            "normalized_0_100": 75,
            "weight": 0.2
          }
        ],
        "derived_from": [
          "lmarena:lmarena_text_overall_rating",
          "livebench:livebench_overall_mean_judgment"
        ],
        "limitations": [
          "This is an external model-intelligence prior, not a RunLocalAI local speed or fit measurement.",
          "Provider papers, human-preference ratings, and LiveBench judgments are not interchangeable with quantized local execution.",
          "Use this score only beside Will-It-Run fit, quantization, context, runtime, and measured tok/s evidence."
        ]
      },
      "scores": [
        {
          "source_id": "livebench",
          "metric": "livebench_category_coding_mean_judgment",
          "value": 1,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "coding",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench category aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_category_instruction_following_mean_judgment",
          "value": 1,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "instruction_following",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench category aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_category_language_mean_judgment",
          "value": 0,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "language",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench category aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_overall_mean_judgment",
          "value": 0.75,
          "normalized_0_100": 75,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench judgment aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_task_coding_completion_mean_judgment",
          "value": 1,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "coding",
          "task": "coding_completion",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench task aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_task_connections_mean_judgment",
          "value": 0,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "language",
          "task": "connections",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench task aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_task_lcb_generation_mean_judgment",
          "value": 1,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "coding",
          "task": "LCB_generation",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench task aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_task_story_generation_mean_judgment",
          "value": 1,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "instruction_following",
          "task": "story_generation",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench task aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_chinese_rating",
          "value": 1113.0088066007875,
          "normalized_0_100": 46.98,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 279,
          "lower": 1098.3183200488043,
          "upper": 1127.6992931527707,
          "category": "chinese",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_coding_rating",
          "value": 1143.1797755340792,
          "normalized_0_100": 50.57,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 287,
          "lower": 1130.4600772837298,
          "upper": 1155.8994737844287,
          "category": "coding",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_overall_rating",
          "value": 1132.0864013484243,
          "normalized_0_100": 49.25,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 292,
          "lower": 1124.7843678308748,
          "upper": 1139.3884348659737,
          "category": "overall",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        }
      ]
    },
    {
      "model_key": "gemini-1-5-flash-001",
      "display_name": "gemini-1.5-flash-001",
      "provider": "google",
      "aliases": [
        "gemini-1.5-flash-001"
      ],
      "openness": null,
      "license": "Proprietary",
      "parameter_billions": null,
      "context_window": null,
      "modalities": [
        "text"
      ],
      "source_ids": [
        "livebench",
        "lmarena"
      ],
      "composite": {
        "score_0_100": 57.91,
        "confidence": "moderate",
        "components": [
          {
            "source_id": "lmarena",
            "metric": "lmarena_text_overall_rating",
            "normalized_0_100": 62,
            "weight": 0.35
          },
          {
            "source_id": "livebench",
            "metric": "livebench_overall_mean_judgment",
            "normalized_0_100": 50.76,
            "weight": 0.2
          }
        ],
        "derived_from": [
          "lmarena:lmarena_text_overall_rating",
          "livebench:livebench_overall_mean_judgment"
        ],
        "limitations": [
          "This is an external model-intelligence prior, not a RunLocalAI local speed or fit measurement.",
          "Provider papers, human-preference ratings, and LiveBench judgments are not interchangeable with quantized local execution.",
          "Use this score only beside Will-It-Run fit, quantization, context, runtime, and measured tok/s evidence."
        ]
      },
      "scores": [
        {
          "source_id": "livebench",
          "metric": "livebench_category_coding_mean_judgment",
          "value": 0.5,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "coding",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench category aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_category_instruction_following_mean_judgment",
          "value": 0.6,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "instruction_following",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench category aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_category_language_mean_judgment",
          "value": 0.45098,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "language",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench category aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_overall_mean_judgment",
          "value": 0.507563,
          "normalized_0_100": 50.76,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench judgment aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_task_connections_mean_judgment",
          "value": 0,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "language",
          "task": "connections",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench task aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_task_lcb_generation_mean_judgment",
          "value": 0.5,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "coding",
          "task": "LCB_generation",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench task aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_task_paraphrase_mean_judgment",
          "value": 0.2,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "instruction_following",
          "task": "paraphrase",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench task aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_task_plot_unscrambling_mean_judgment",
          "value": 0.352941,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "language",
          "task": "plot_unscrambling",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench task aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_task_story_generation_mean_judgment",
          "value": 1,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "instruction_following",
          "task": "story_generation",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench task aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_task_typos_mean_judgment",
          "value": 1,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "language",
          "task": "typos",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench task aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_chinese_rating",
          "value": 1233.265670744176,
          "normalized_0_100": 61.28,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 225,
          "lower": 1223.287030142269,
          "upper": 1243.2443113460831,
          "category": "chinese",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_coding_rating",
          "value": 1236.1765418891341,
          "normalized_0_100": 61.63,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 241,
          "lower": 1228.5847589829596,
          "upper": 1243.768324795309,
          "category": "coding",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_overall_rating",
          "value": 1239.345929661898,
          "normalized_0_100": 62,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 241,
          "lower": 1234.8897746002174,
          "upper": 1243.8020847235784,
          "category": "overall",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        }
      ]
    },
    {
      "model_key": "gpt-4o-mini-2024-07-18",
      "display_name": "gpt-4o-mini-2024-07-18",
      "provider": "openai",
      "aliases": [
        "gpt-4o-mini-2024-07-18"
      ],
      "openness": null,
      "license": "Proprietary",
      "parameter_billions": null,
      "context_window": null,
      "modalities": [
        "text"
      ],
      "source_ids": [
        "livebench",
        "lmarena"
      ],
      "composite": {
        "score_0_100": 57.9,
        "confidence": "moderate",
        "components": [
          {
            "source_id": "lmarena",
            "metric": "lmarena_text_overall_rating",
            "normalized_0_100": 67.61,
            "weight": 0.35
          },
          {
            "source_id": "livebench",
            "metric": "livebench_overall_mean_judgment",
            "normalized_0_100": 40.91,
            "weight": 0.2
          }
        ],
        "derived_from": [
          "lmarena:lmarena_text_overall_rating",
          "livebench:livebench_overall_mean_judgment"
        ],
        "limitations": [
          "This is an external model-intelligence prior, not a RunLocalAI local speed or fit measurement.",
          "Provider papers, human-preference ratings, and LiveBench judgments are not interchangeable with quantized local execution.",
          "Use this score only beside Will-It-Run fit, quantization, context, runtime, and measured tok/s evidence."
        ]
      },
      "scores": [
        {
          "source_id": "livebench",
          "metric": "livebench_category_coding_mean_judgment",
          "value": 0.5,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "coding",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench category aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_category_instruction_following_mean_judgment",
          "value": 0.666667,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "instruction_following",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench category aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_category_language_mean_judgment",
          "value": 0.269733,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "language",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench category aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_overall_mean_judgment",
          "value": 0.409111,
          "normalized_0_100": 40.91,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench judgment aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_task_connections_mean_judgment",
          "value": 1,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "language",
          "task": "connections",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench task aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_task_lcb_generation_mean_judgment",
          "value": 0.5,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "coding",
          "task": "LCB_generation",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench task aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_task_paraphrase_mean_judgment",
          "value": 0.333333,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "instruction_following",
          "task": "paraphrase",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench task aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_task_plot_unscrambling_mean_judgment",
          "value": 0.174332,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "language",
          "task": "plot_unscrambling",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench task aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_task_story_generation_mean_judgment",
          "value": 1,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "instruction_following",
          "task": "story_generation",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench task aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_task_typos_mean_judgment",
          "value": 0,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "language",
          "task": "typos",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench task aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_chinese_rating",
          "value": 1263.0920125414254,
          "normalized_0_100": 64.83,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 202,
          "lower": 1254.1410583909533,
          "upper": 1272.0429666918976,
          "category": "chinese",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_coding_rating",
          "value": 1290.2157854091652,
          "normalized_0_100": 68.05,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 209,
          "lower": 1283.746444651411,
          "upper": 1296.6851261669194,
          "category": "coding",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_overall_rating",
          "value": 1286.4697778495058,
          "normalized_0_100": 67.61,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 207,
          "lower": 1282.9873898798776,
          "upper": 1289.9521658191338,
          "category": "overall",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        }
      ]
    },
    {
      "model_key": "amazon-nova-lite-v1-0",
      "display_name": "amazon-nova-lite-v1.0",
      "provider": "amazon",
      "aliases": [
        "amazon-nova-lite-v1.0",
        "amazon.nova-lite-v1:0"
      ],
      "openness": null,
      "license": "Proprietary",
      "parameter_billions": null,
      "context_window": null,
      "modalities": [
        "text"
      ],
      "source_ids": [
        "livebench",
        "lmarena"
      ],
      "composite": {
        "score_0_100": 57.16,
        "confidence": "moderate",
        "components": [
          {
            "source_id": "lmarena",
            "metric": "lmarena_text_overall_rating",
            "normalized_0_100": 60.72,
            "weight": 0.35
          },
          {
            "source_id": "livebench",
            "metric": "livebench_overall_mean_judgment",
            "normalized_0_100": 50.94,
            "weight": 0.2
          }
        ],
        "derived_from": [
          "lmarena:lmarena_text_overall_rating",
          "livebench:livebench_overall_mean_judgment"
        ],
        "limitations": [
          "This is an external model-intelligence prior, not a RunLocalAI local speed or fit measurement.",
          "Provider papers, human-preference ratings, and LiveBench judgments are not interchangeable with quantized local execution.",
          "Use this score only beside Will-It-Run fit, quantization, context, runtime, and measured tok/s evidence."
        ]
      },
      "scores": [
        {
          "source_id": "livebench",
          "metric": "livebench_category_coding_mean_judgment",
          "value": 1,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "coding",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench category aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_category_instruction_following_mean_judgment",
          "value": 0.4,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "instruction_following",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench category aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_category_language_mean_judgment",
          "value": 0.291414,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "language",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench category aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_overall_mean_judgment",
          "value": 0.50938,
          "normalized_0_100": 50.94,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench judgment aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_task_coding_completion_mean_judgment",
          "value": 1,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "coding",
          "task": "coding_completion",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench task aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_task_lcb_generation_mean_judgment",
          "value": 1,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "coding",
          "task": "LCB_generation",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench task aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_task_paraphrase_mean_judgment",
          "value": 0.4,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "instruction_following",
          "task": "paraphrase",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench task aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_task_plot_unscrambling_mean_judgment",
          "value": 0.082828,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "language",
          "task": "plot_unscrambling",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench task aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_task_typos_mean_judgment",
          "value": 0.5,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "language",
          "task": "typos",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench task aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_chinese_rating",
          "value": 1224.70433995851,
          "normalized_0_100": 60.26,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 229,
          "lower": 1208.7328467507255,
          "upper": 1240.6758331662945,
          "category": "chinese",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_coding_rating",
          "value": 1239.323393676761,
          "normalized_0_100": 62,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 240,
          "lower": 1229.098354955601,
          "upper": 1249.5484323979213,
          "category": "coding",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_overall_rating",
          "value": 1228.5164532780122,
          "normalized_0_100": 60.72,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 247,
          "lower": 1223.389039727084,
          "upper": 1233.6438668289402,
          "category": "overall",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        }
      ]
    },
    {
      "model_key": "llama-3-1-nemotron-70b-instruct",
      "display_name": "llama-3.1-nemotron-70b-instruct",
      "provider": "nvidia",
      "aliases": [
        "llama-3.1-nemotron-70b-instruct"
      ],
      "openness": null,
      "license": "Llama 3.1",
      "parameter_billions": null,
      "context_window": null,
      "modalities": [
        "text"
      ],
      "source_ids": [
        "livebench",
        "lmarena"
      ],
      "composite": {
        "score_0_100": 57,
        "confidence": "moderate",
        "components": [
          {
            "source_id": "lmarena",
            "metric": "lmarena_text_overall_rating",
            "normalized_0_100": 67.16,
            "weight": 0.35
          },
          {
            "source_id": "livebench",
            "metric": "livebench_overall_mean_judgment",
            "normalized_0_100": 39.22,
            "weight": 0.2
          }
        ],
        "derived_from": [
          "lmarena:lmarena_text_overall_rating",
          "livebench:livebench_overall_mean_judgment"
        ],
        "limitations": [
          "This is an external model-intelligence prior, not a RunLocalAI local speed or fit measurement.",
          "Provider papers, human-preference ratings, and LiveBench judgments are not interchangeable with quantized local execution.",
          "Use this score only beside Will-It-Run fit, quantization, context, runtime, and measured tok/s evidence."
        ]
      },
      "scores": [
        {
          "source_id": "livebench",
          "metric": "livebench_category_coding_mean_judgment",
          "value": 0.5,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "coding",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench category aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_category_instruction_following_mean_judgment",
          "value": 1,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "instruction_following",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench category aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_category_language_mean_judgment",
          "value": 0.117647,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "language",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench category aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_overall_mean_judgment",
          "value": 0.392157,
          "normalized_0_100": 39.22,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench judgment aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_task_coding_completion_mean_judgment",
          "value": 0,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "coding",
          "task": "coding_completion",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench task aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_task_connections_mean_judgment",
          "value": 0,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "language",
          "task": "connections",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench task aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_task_lcb_generation_mean_judgment",
          "value": 1,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "coding",
          "task": "LCB_generation",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench task aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_task_plot_unscrambling_mean_judgment",
          "value": 0.352941,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "language",
          "task": "plot_unscrambling",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench task aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_task_story_generation_mean_judgment",
          "value": 1,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "instruction_following",
          "task": "story_generation",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench task aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_task_typos_mean_judgment",
          "value": 0,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "language",
          "task": "typos",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench task aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_chinese_rating",
          "value": 1262.048280267259,
          "normalized_0_100": 64.7,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 203,
          "lower": 1240.8910241162857,
          "upper": 1283.2055364182324,
          "category": "chinese",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_coding_rating",
          "value": 1272.2952516265382,
          "normalized_0_100": 65.92,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 222,
          "lower": 1257.0353641352722,
          "upper": 1287.5551391178044,
          "category": "coding",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_overall_rating",
          "value": 1282.6913916850524,
          "normalized_0_100": 67.16,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 210,
          "lower": 1274.9851730306098,
          "upper": 1290.397610339495,
          "category": "overall",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        }
      ]
    },
    {
      "model_key": "gpt-4o-2024-08-06",
      "display_name": "gpt-4o-2024-08-06",
      "provider": "openai",
      "aliases": [
        "gpt-4o-2024-08-06"
      ],
      "openness": null,
      "license": "Proprietary",
      "parameter_billions": null,
      "context_window": null,
      "modalities": [
        "text"
      ],
      "source_ids": [
        "livebench",
        "lmarena"
      ],
      "composite": {
        "score_0_100": 55.34,
        "confidence": "moderate",
        "components": [
          {
            "source_id": "lmarena",
            "metric": "lmarena_text_overall_rating",
            "normalized_0_100": 67.15,
            "weight": 0.35
          },
          {
            "source_id": "livebench",
            "metric": "livebench_overall_mean_judgment",
            "normalized_0_100": 34.68,
            "weight": 0.2
          }
        ],
        "derived_from": [
          "lmarena:lmarena_text_overall_rating",
          "livebench:livebench_overall_mean_judgment"
        ],
        "limitations": [
          "This is an external model-intelligence prior, not a RunLocalAI local speed or fit measurement.",
          "Provider papers, human-preference ratings, and LiveBench judgments are not interchangeable with quantized local execution.",
          "Use this score only beside Will-It-Run fit, quantization, context, runtime, and measured tok/s evidence."
        ]
      },
      "scores": [
        {
          "source_id": "livebench",
          "metric": "livebench_category_coding_mean_judgment",
          "value": 0,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "coding",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench category aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_category_instruction_following_mean_judgment",
          "value": 0.666667,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "instruction_following",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench category aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_category_language_mean_judgment",
          "value": 0.357647,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "language",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench category aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_overall_mean_judgment",
          "value": 0.346841,
          "normalized_0_100": 34.68,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench judgment aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_task_connections_mean_judgment",
          "value": 1,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "language",
          "task": "connections",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench task aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_task_lcb_generation_mean_judgment",
          "value": 0,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "coding",
          "task": "LCB_generation",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench task aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_task_paraphrase_mean_judgment",
          "value": 0.333333,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "instruction_following",
          "task": "paraphrase",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench task aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_task_plot_unscrambling_mean_judgment",
          "value": 0.394118,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "language",
          "task": "plot_unscrambling",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench task aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_task_story_generation_mean_judgment",
          "value": 1,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "instruction_following",
          "task": "story_generation",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench task aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_task_typos_mean_judgment",
          "value": 0,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "language",
          "task": "typos",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench task aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_chinese_rating",
          "value": 1253.1848238774276,
          "normalized_0_100": 63.65,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 205,
          "lower": 1242.6795788666877,
          "upper": 1263.6900688881674,
          "category": "chinese",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_coding_rating",
          "value": 1284.0341389757498,
          "normalized_0_100": 67.32,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 217,
          "lower": 1276.1954269445403,
          "upper": 1291.8728510069595,
          "category": "coding",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_overall_rating",
          "value": 1282.5872296851026,
          "normalized_0_100": 67.15,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 211,
          "lower": 1278.41666591467,
          "upper": 1286.7577934555352,
          "category": "overall",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        }
      ]
    },
    {
      "model_key": "qwen1-5-7b-chat",
      "display_name": "qwen1.5-7b-chat",
      "provider": "alibaba",
      "aliases": [
        "Qwen1.5-7B-Chat",
        "qwen1.5-7b-chat"
      ],
      "openness": null,
      "license": "Qianwen LICENSE",
      "parameter_billions": null,
      "context_window": null,
      "modalities": [
        "text"
      ],
      "source_ids": [
        "livebench",
        "lmarena"
      ],
      "composite": {
        "score_0_100": 54.94,
        "confidence": "moderate",
        "components": [
          {
            "source_id": "lmarena",
            "metric": "lmarena_text_overall_rating",
            "normalized_0_100": 43.47,
            "weight": 0.35
          },
          {
            "source_id": "livebench",
            "metric": "livebench_overall_mean_judgment",
            "normalized_0_100": 75,
            "weight": 0.2
          }
        ],
        "derived_from": [
          "lmarena:lmarena_text_overall_rating",
          "livebench:livebench_overall_mean_judgment"
        ],
        "limitations": [
          "This is an external model-intelligence prior, not a RunLocalAI local speed or fit measurement.",
          "Provider papers, human-preference ratings, and LiveBench judgments are not interchangeable with quantized local execution.",
          "Use this score only beside Will-It-Run fit, quantization, context, runtime, and measured tok/s evidence."
        ]
      },
      "scores": [
        {
          "source_id": "livebench",
          "metric": "livebench_category_coding_mean_judgment",
          "value": 1,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "coding",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench category aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_category_instruction_following_mean_judgment",
          "value": 1,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "instruction_following",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench category aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_category_language_mean_judgment",
          "value": 0,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "language",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench category aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_overall_mean_judgment",
          "value": 0.75,
          "normalized_0_100": 75,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench judgment aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_task_coding_completion_mean_judgment",
          "value": 1,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "coding",
          "task": "coding_completion",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench task aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_task_connections_mean_judgment",
          "value": 0,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "language",
          "task": "connections",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench task aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_task_lcb_generation_mean_judgment",
          "value": 1,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "coding",
          "task": "LCB_generation",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench task aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_task_story_generation_mean_judgment",
          "value": 1,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "instruction_following",
          "task": "story_generation",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench task aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_chinese_rating",
          "value": 1139.5887021493472,
          "normalized_0_100": 50.14,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 268,
          "lower": 1111.0946626629343,
          "upper": 1168.0827416357602,
          "category": "chinese",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_coding_rating",
          "value": 1107.8171924416972,
          "normalized_0_100": 46.36,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 302,
          "lower": 1087.395232021022,
          "upper": 1128.2391528623725,
          "category": "coding",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_overall_rating",
          "value": 1083.5020949822524,
          "normalized_0_100": 43.47,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 319,
          "lower": 1073.7839321541946,
          "upper": 1093.2202578103106,
          "category": "overall",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        }
      ]
    },
    {
      "model_key": "mixtral-8x22b-instruct-v0-1",
      "display_name": "mixtral-8x22b-instruct-v0.1",
      "provider": "mistral",
      "aliases": [
        "Mixtral-8x22B-Instruct-v0.1",
        "mixtral-8x22b-instruct-v0.1"
      ],
      "openness": null,
      "license": "Apache 2.0",
      "parameter_billions": null,
      "context_window": null,
      "modalities": [
        "text"
      ],
      "source_ids": [
        "livebench",
        "lmarena"
      ],
      "composite": {
        "score_0_100": 53.95,
        "confidence": "moderate",
        "components": [
          {
            "source_id": "lmarena",
            "metric": "lmarena_text_overall_rating",
            "normalized_0_100": 52.81,
            "weight": 0.35
          },
          {
            "source_id": "livebench",
            "metric": "livebench_overall_mean_judgment",
            "normalized_0_100": 55.95,
            "weight": 0.2
          }
        ],
        "derived_from": [
          "lmarena:lmarena_text_overall_rating",
          "livebench:livebench_overall_mean_judgment"
        ],
        "limitations": [
          "This is an external model-intelligence prior, not a RunLocalAI local speed or fit measurement.",
          "Provider papers, human-preference ratings, and LiveBench judgments are not interchangeable with quantized local execution.",
          "Use this score only beside Will-It-Run fit, quantization, context, runtime, and measured tok/s evidence."
        ]
      },
      "scores": [
        {
          "source_id": "livebench",
          "metric": "livebench_category_coding_mean_judgment",
          "value": 1,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "coding",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench category aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_category_instruction_following_mean_judgment",
          "value": 1,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "instruction_following",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench category aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_category_language_mean_judgment",
          "value": 0.119048,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "language",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench category aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_overall_mean_judgment",
          "value": 0.559524,
          "normalized_0_100": 55.95,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench judgment aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_task_coding_completion_mean_judgment",
          "value": 1,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "coding",
          "task": "coding_completion",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench task aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_task_connections_mean_judgment",
          "value": 0,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "language",
          "task": "connections",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench task aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_task_plot_unscrambling_mean_judgment",
          "value": 0.238095,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "language",
          "task": "plot_unscrambling",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench task aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_task_story_generation_mean_judgment",
          "value": 1,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "instruction_following",
          "task": "story_generation",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench task aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_chinese_rating",
          "value": 1115.4050387986126,
          "normalized_0_100": 47.27,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 277,
          "lower": 1104.4517105297123,
          "upper": 1126.358367067513,
          "category": "chinese",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_coding_rating",
          "value": 1166.1194692753938,
          "normalized_0_100": 53.3,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 276,
          "lower": 1157.5591540963374,
          "upper": 1174.67978445445,
          "category": "coding",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_overall_rating",
          "value": 1162.030796206934,
          "normalized_0_100": 52.81,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 283,
          "lower": 1157.515730552575,
          "upper": 1166.545861861293,
          "category": "overall",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        }
      ]
    },
    {
      "model_key": "mistral-large-2407",
      "display_name": "mistral-large-2407",
      "provider": "mistral",
      "aliases": [
        "mistral-large-2407"
      ],
      "openness": null,
      "license": "Mistral Research",
      "parameter_billions": null,
      "context_window": null,
      "modalities": [
        "text"
      ],
      "source_ids": [
        "livebench",
        "lmarena"
      ],
      "composite": {
        "score_0_100": 52.43,
        "confidence": "moderate",
        "components": [
          {
            "source_id": "lmarena",
            "metric": "lmarena_text_overall_rating",
            "normalized_0_100": 65.2,
            "weight": 0.35
          },
          {
            "source_id": "livebench",
            "metric": "livebench_overall_mean_judgment",
            "normalized_0_100": 30.07,
            "weight": 0.2
          }
        ],
        "derived_from": [
          "lmarena:lmarena_text_overall_rating",
          "livebench:livebench_overall_mean_judgment"
        ],
        "limitations": [
          "This is an external model-intelligence prior, not a RunLocalAI local speed or fit measurement.",
          "Provider papers, human-preference ratings, and LiveBench judgments are not interchangeable with quantized local execution.",
          "Use this score only beside Will-It-Run fit, quantization, context, runtime, and measured tok/s evidence."
        ]
      },
      "scores": [
        {
          "source_id": "livebench",
          "metric": "livebench_category_coding_mean_judgment",
          "value": 0,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "coding",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench category aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_category_instruction_following_mean_judgment",
          "value": 0.666667,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "instruction_following",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench category aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_category_language_mean_judgment",
          "value": 0.156863,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "language",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench category aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_overall_mean_judgment",
          "value": 0.300654,
          "normalized_0_100": 30.07,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench judgment aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_task_connections_mean_judgment",
          "value": 0,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "language",
          "task": "connections",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench task aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_task_lcb_generation_mean_judgment",
          "value": 0,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "coding",
          "task": "LCB_generation",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench task aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_task_paraphrase_mean_judgment",
          "value": 0.333333,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "instruction_following",
          "task": "paraphrase",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench task aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_task_plot_unscrambling_mean_judgment",
          "value": 0.470588,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "language",
          "task": "plot_unscrambling",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench task aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_task_story_generation_mean_judgment",
          "value": 1,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "instruction_following",
          "task": "story_generation",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench task aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_task_typos_mean_judgment",
          "value": 0,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "language",
          "task": "typos",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench task aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_chinese_rating",
          "value": 1239.4126952925521,
          "normalized_0_100": 62.01,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 219,
          "lower": 1228.9401791423847,
          "upper": 1249.8852114427193,
          "category": "chinese",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_coding_rating",
          "value": 1277.1478143799989,
          "normalized_0_100": 66.5,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 219,
          "lower": 1269.3677211213026,
          "upper": 1284.9279076386952,
          "category": "coding",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_overall_rating",
          "value": 1266.1858797082655,
          "normalized_0_100": 65.2,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 227,
          "lower": 1262.2981816568767,
          "upper": 1270.0735777596542,
          "category": "overall",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        }
      ]
    },
    {
      "model_key": "command-r-plus-08-2024",
      "display_name": "command-r-plus-08-2024",
      "provider": "cohere",
      "aliases": [
        "command-r-plus-08-2024"
      ],
      "openness": null,
      "license": "CC-BY-NC-4.0",
      "parameter_billions": null,
      "context_window": null,
      "modalities": [
        "text"
      ],
      "source_ids": [
        "livebench",
        "lmarena"
      ],
      "composite": {
        "score_0_100": 52.14,
        "confidence": "moderate",
        "components": [
          {
            "source_id": "lmarena",
            "metric": "lmarena_text_overall_rating",
            "normalized_0_100": 60.77,
            "weight": 0.35
          },
          {
            "source_id": "livebench",
            "metric": "livebench_overall_mean_judgment",
            "normalized_0_100": 37.05,
            "weight": 0.2
          }
        ],
        "derived_from": [
          "lmarena:lmarena_text_overall_rating",
          "livebench:livebench_overall_mean_judgment"
        ],
        "limitations": [
          "This is an external model-intelligence prior, not a RunLocalAI local speed or fit measurement.",
          "Provider papers, human-preference ratings, and LiveBench judgments are not interchangeable with quantized local execution.",
          "Use this score only beside Will-It-Run fit, quantization, context, runtime, and measured tok/s evidence."
        ]
      },
      "scores": [
        {
          "source_id": "livebench",
          "metric": "livebench_category_coding_mean_judgment",
          "value": 0.2,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "coding",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench category aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_category_instruction_following_mean_judgment",
          "value": 0.65,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "instruction_following",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench category aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_category_language_mean_judgment",
          "value": 0.429091,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "language",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench category aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_overall_mean_judgment",
          "value": 0.370455,
          "normalized_0_100": 37.05,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench judgment aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_task_coding_completion_mean_judgment",
          "value": 0,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "coding",
          "task": "coding_completion",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench task aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_task_connections_mean_judgment",
          "value": 1,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "language",
          "task": "connections",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench task aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_task_lcb_generation_mean_judgment",
          "value": 0.25,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "coding",
          "task": "LCB_generation",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench task aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_task_paraphrase_mean_judgment",
          "value": 0.3,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "instruction_following",
          "task": "paraphrase",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench task aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_task_plot_unscrambling_mean_judgment",
          "value": 0.145455,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "language",
          "task": "plot_unscrambling",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench task aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_task_story_generation_mean_judgment",
          "value": 1,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "instruction_following",
          "task": "story_generation",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench task aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_task_typos_mean_judgment",
          "value": 0.333333,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "language",
          "task": "typos",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench task aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_chinese_rating",
          "value": 1225.4582955207813,
          "normalized_0_100": 60.35,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 228,
          "lower": 1205.9002085023617,
          "upper": 1245.016382539201,
          "category": "chinese",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_coding_rating",
          "value": 1187.5997753435392,
          "normalized_0_100": 55.85,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 266,
          "lower": 1173.8320000535984,
          "upper": 1201.3675506334798,
          "category": "coding",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_overall_rating",
          "value": 1228.9431142961073,
          "normalized_0_100": 60.77,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 246,
          "lower": 1222.4037176214592,
          "upper": 1235.4825109707554,
          "category": "overall",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        }
      ]
    },
    {
      "model_key": "gemma-2-9b-it",
      "display_name": "gemma-2-9b-it",
      "provider": "google",
      "aliases": [
        "gemma-2-9b-it"
      ],
      "openness": null,
      "license": "Gemma license",
      "parameter_billions": null,
      "context_window": null,
      "modalities": [
        "text"
      ],
      "source_ids": [
        "livebench",
        "lmarena"
      ],
      "composite": {
        "score_0_100": 50.73,
        "confidence": "moderate",
        "components": [
          {
            "source_id": "lmarena",
            "metric": "lmarena_text_overall_rating",
            "normalized_0_100": 58.21,
            "weight": 0.35
          },
          {
            "source_id": "livebench",
            "metric": "livebench_overall_mean_judgment",
            "normalized_0_100": 37.65,
            "weight": 0.2
          }
        ],
        "derived_from": [
          "lmarena:lmarena_text_overall_rating",
          "livebench:livebench_overall_mean_judgment"
        ],
        "limitations": [
          "This is an external model-intelligence prior, not a RunLocalAI local speed or fit measurement.",
          "Provider papers, human-preference ratings, and LiveBench judgments are not interchangeable with quantized local execution.",
          "Use this score only beside Will-It-Run fit, quantization, context, runtime, and measured tok/s evidence."
        ]
      },
      "scores": [
        {
          "source_id": "livebench",
          "metric": "livebench_category_coding_mean_judgment",
          "value": 0,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "coding",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench category aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_category_instruction_following_mean_judgment",
          "value": 0.7,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "instruction_following",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench category aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_category_language_mean_judgment",
          "value": 0.411765,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "language",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench category aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_overall_mean_judgment",
          "value": 0.376471,
          "normalized_0_100": 37.65,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench judgment aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_task_connections_mean_judgment",
          "value": 0,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "language",
          "task": "connections",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench task aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_task_lcb_generation_mean_judgment",
          "value": 0,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "coding",
          "task": "LCB_generation",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench task aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_task_paraphrase_mean_judgment",
          "value": 0.4,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "instruction_following",
          "task": "paraphrase",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench task aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_task_plot_unscrambling_mean_judgment",
          "value": 0.235294,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "language",
          "task": "plot_unscrambling",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench task aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_task_story_generation_mean_judgment",
          "value": 1,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "instruction_following",
          "task": "story_generation",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench task aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_task_typos_mean_judgment",
          "value": 1,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "language",
          "task": "typos",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench task aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_chinese_rating",
          "value": 1184.0390792818607,
          "normalized_0_100": 55.43,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 254,
          "lower": 1174.5048819812473,
          "upper": 1193.5732765824744,
          "category": "chinese",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_coding_rating",
          "value": 1174.01145762174,
          "normalized_0_100": 54.24,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 272,
          "lower": 1166.8475299951576,
          "upper": 1181.1753852483223,
          "category": "coding",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_overall_rating",
          "value": 1207.4304048045037,
          "normalized_0_100": 58.21,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 260,
          "lower": 1203.658704166398,
          "upper": 1211.2021054426095,
          "category": "overall",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        }
      ]
    },
    {
      "model_key": "mixtral-8x7b-instruct-v0-1",
      "display_name": "mixtral-8x7b-instruct-v0.1",
      "provider": "mistral",
      "aliases": [
        "Mixtral-8x7B-Instruct-v0.1",
        "mixtral-8x7b-instruct-v0.1"
      ],
      "openness": null,
      "license": "Apache 2.0",
      "parameter_billions": null,
      "context_window": null,
      "modalities": [
        "text"
      ],
      "source_ids": [
        "livebench",
        "lmarena"
      ],
      "composite": {
        "score_0_100": 50.35,
        "confidence": "moderate",
        "components": [
          {
            "source_id": "lmarena",
            "metric": "lmarena_text_overall_rating",
            "normalized_0_100": 49.19,
            "weight": 0.35
          },
          {
            "source_id": "livebench",
            "metric": "livebench_overall_mean_judgment",
            "normalized_0_100": 52.38,
            "weight": 0.2
          }
        ],
        "derived_from": [
          "lmarena:lmarena_text_overall_rating",
          "livebench:livebench_overall_mean_judgment"
        ],
        "limitations": [
          "This is an external model-intelligence prior, not a RunLocalAI local speed or fit measurement.",
          "Provider papers, human-preference ratings, and LiveBench judgments are not interchangeable with quantized local execution.",
          "Use this score only beside Will-It-Run fit, quantization, context, runtime, and measured tok/s evidence."
        ]
      },
      "scores": [
        {
          "source_id": "livebench",
          "metric": "livebench_category_coding_mean_judgment",
          "value": 0,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "coding",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench category aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_category_instruction_following_mean_judgment",
          "value": 1,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "instruction_following",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench category aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_category_language_mean_judgment",
          "value": 0.547619,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "language",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench category aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_overall_mean_judgment",
          "value": 0.52381,
          "normalized_0_100": 52.38,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench judgment aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_task_coding_completion_mean_judgment",
          "value": 0,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "coding",
          "task": "coding_completion",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench task aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_task_connections_mean_judgment",
          "value": 1,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "language",
          "task": "connections",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench task aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_task_plot_unscrambling_mean_judgment",
          "value": 0.095238,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "language",
          "task": "plot_unscrambling",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench task aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_task_story_generation_mean_judgment",
          "value": 1,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "instruction_following",
          "task": "story_generation",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench task aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_chinese_rating",
          "value": 1054.4203605687726,
          "normalized_0_100": 40.01,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 297,
          "lower": 1044.0997937516242,
          "upper": 1064.740927385921,
          "category": "chinese",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_coding_rating",
          "value": 1126.896930230774,
          "normalized_0_100": 48.63,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 294,
          "lower": 1118.8165242674468,
          "upper": 1134.977336194101,
          "category": "coding",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_overall_rating",
          "value": 1131.6018074452425,
          "normalized_0_100": 49.19,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 293,
          "lower": 1127.3677895396966,
          "upper": 1135.8358253507881,
          "category": "overall",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        }
      ]
    },
    {
      "model_key": "amazon-nova-micro-v1-0",
      "display_name": "amazon-nova-micro-v1.0",
      "provider": "amazon",
      "aliases": [
        "amazon-nova-micro-v1.0",
        "amazon.nova-micro-v1:0"
      ],
      "openness": null,
      "license": "Proprietary",
      "parameter_billions": null,
      "context_window": null,
      "modalities": [
        "text"
      ],
      "source_ids": [
        "livebench",
        "lmarena"
      ],
      "composite": {
        "score_0_100": 50.22,
        "confidence": "moderate",
        "components": [
          {
            "source_id": "lmarena",
            "metric": "lmarena_text_overall_rating",
            "normalized_0_100": 58.33,
            "weight": 0.35
          },
          {
            "source_id": "livebench",
            "metric": "livebench_overall_mean_judgment",
            "normalized_0_100": 36.02,
            "weight": 0.2
          }
        ],
        "derived_from": [
          "lmarena:lmarena_text_overall_rating",
          "livebench:livebench_overall_mean_judgment"
        ],
        "limitations": [
          "This is an external model-intelligence prior, not a RunLocalAI local speed or fit measurement.",
          "Provider papers, human-preference ratings, and LiveBench judgments are not interchangeable with quantized local execution.",
          "Use this score only beside Will-It-Run fit, quantization, context, runtime, and measured tok/s evidence."
        ]
      },
      "scores": [
        {
          "source_id": "livebench",
          "metric": "livebench_category_coding_mean_judgment",
          "value": 0.5,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "coding",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench category aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_category_instruction_following_mean_judgment",
          "value": 0.3,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "instruction_following",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench category aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_category_language_mean_judgment",
          "value": 0.305387,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "language",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench category aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_overall_mean_judgment",
          "value": 0.360221,
          "normalized_0_100": 36.02,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench judgment aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_task_coding_completion_mean_judgment",
          "value": 0,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "coding",
          "task": "coding_completion",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench task aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_task_lcb_generation_mean_judgment",
          "value": 1,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "coding",
          "task": "LCB_generation",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench task aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_task_paraphrase_mean_judgment",
          "value": 0.3,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "instruction_following",
          "task": "paraphrase",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench task aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_task_plot_unscrambling_mean_judgment",
          "value": 0.110774,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "language",
          "task": "plot_unscrambling",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench task aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_task_typos_mean_judgment",
          "value": 0.5,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "language",
          "task": "typos",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench task aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_chinese_rating",
          "value": 1208.7389375435855,
          "normalized_0_100": 58.36,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 243,
          "lower": 1192.5958353607507,
          "upper": 1224.8820397264199,
          "category": "chinese",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_coding_rating",
          "value": 1218.2456229593058,
          "normalized_0_100": 59.5,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 251,
          "lower": 1207.969996233106,
          "upper": 1228.5212496855054,
          "category": "coding",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_overall_rating",
          "value": 1208.4519955085307,
          "normalized_0_100": 58.33,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 259,
          "lower": 1203.3213538130722,
          "upper": 1213.5826372039892,
          "category": "overall",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        }
      ]
    },
    {
      "model_key": "command-r-08-2024",
      "display_name": "command-r-08-2024",
      "provider": "cohere",
      "aliases": [
        "command-r-08-2024"
      ],
      "openness": null,
      "license": "CC-BY-NC-4.0",
      "parameter_billions": null,
      "context_window": null,
      "modalities": [
        "text"
      ],
      "source_ids": [
        "livebench",
        "lmarena"
      ],
      "composite": {
        "score_0_100": 49.68,
        "confidence": "moderate",
        "components": [
          {
            "source_id": "lmarena",
            "metric": "lmarena_text_overall_rating",
            "normalized_0_100": 55.83,
            "weight": 0.35
          },
          {
            "source_id": "livebench",
            "metric": "livebench_overall_mean_judgment",
            "normalized_0_100": 38.91,
            "weight": 0.2
          }
        ],
        "derived_from": [
          "lmarena:lmarena_text_overall_rating",
          "livebench:livebench_overall_mean_judgment"
        ],
        "limitations": [
          "This is an external model-intelligence prior, not a RunLocalAI local speed or fit measurement.",
          "Provider papers, human-preference ratings, and LiveBench judgments are not interchangeable with quantized local execution.",
          "Use this score only beside Will-It-Run fit, quantization, context, runtime, and measured tok/s evidence."
        ]
      },
      "scores": [
        {
          "source_id": "livebench",
          "metric": "livebench_category_coding_mean_judgment",
          "value": 0,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "coding",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench category aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_category_instruction_following_mean_judgment",
          "value": 0.65,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "instruction_following",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench category aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_category_language_mean_judgment",
          "value": 0.518182,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "language",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench category aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_overall_mean_judgment",
          "value": 0.389091,
          "normalized_0_100": 38.91,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench judgment aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_task_connections_mean_judgment",
          "value": 0.5,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "language",
          "task": "connections",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench task aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_task_lcb_generation_mean_judgment",
          "value": 0,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "coding",
          "task": "LCB_generation",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench task aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_task_paraphrase_mean_judgment",
          "value": 0.3,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "instruction_following",
          "task": "paraphrase",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench task aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_task_plot_unscrambling_mean_judgment",
          "value": 0.090909,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "language",
          "task": "plot_unscrambling",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench task aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_task_story_generation_mean_judgment",
          "value": 1,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "instruction_following",
          "task": "story_generation",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench task aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_task_typos_mean_judgment",
          "value": 0.666667,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "language",
          "task": "typos",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench task aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_chinese_rating",
          "value": 1181.2759068144528,
          "normalized_0_100": 55.1,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 256,
          "lower": 1161.3583749371978,
          "upper": 1201.1934386917078,
          "category": "chinese",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_coding_rating",
          "value": 1169.9518803042863,
          "normalized_0_100": 53.75,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 274,
          "lower": 1156.5850593352739,
          "upper": 1183.3187012732988,
          "category": "coding",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_overall_rating",
          "value": 1187.40349011732,
          "normalized_0_100": 55.83,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 269,
          "lower": 1180.8854434152472,
          "upper": 1193.9215368193927,
          "category": "overall",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        }
      ]
    },
    {
      "model_key": "gpt-3-5-turbo-1106",
      "display_name": "gpt-3.5-turbo-1106",
      "provider": "openai",
      "aliases": [
        "gpt-3.5-turbo-1106"
      ],
      "openness": null,
      "license": "Proprietary",
      "parameter_billions": null,
      "context_window": null,
      "modalities": [
        "text"
      ],
      "source_ids": [
        "livebench",
        "lmarena"
      ],
      "composite": {
        "score_0_100": 48.37,
        "confidence": "moderate",
        "components": [
          {
            "source_id": "lmarena",
            "metric": "lmarena_text_overall_rating",
            "normalized_0_100": 44.74,
            "weight": 0.35
          },
          {
            "source_id": "livebench",
            "metric": "livebench_overall_mean_judgment",
            "normalized_0_100": 54.71,
            "weight": 0.2
          }
        ],
        "derived_from": [
          "lmarena:lmarena_text_overall_rating",
          "livebench:livebench_overall_mean_judgment"
        ],
        "limitations": [
          "This is an external model-intelligence prior, not a RunLocalAI local speed or fit measurement.",
          "Provider papers, human-preference ratings, and LiveBench judgments are not interchangeable with quantized local execution.",
          "Use this score only beside Will-It-Run fit, quantization, context, runtime, and measured tok/s evidence."
        ]
      },
      "scores": [
        {
          "source_id": "livebench",
          "metric": "livebench_category_coding_mean_judgment",
          "value": 1,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "coding",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench category aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_category_instruction_following_mean_judgment",
          "value": 1,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "instruction_following",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench category aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_category_language_mean_judgment",
          "value": 0.245098,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "language",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench category aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_overall_mean_judgment",
          "value": 0.547059,
          "normalized_0_100": 54.71,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench judgment aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_task_connections_mean_judgment",
          "value": 0.5,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "language",
          "task": "connections",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench task aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_task_lcb_generation_mean_judgment",
          "value": 1,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "coding",
          "task": "LCB_generation",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench task aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_task_plot_unscrambling_mean_judgment",
          "value": 0.235294,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "language",
          "task": "plot_unscrambling",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench task aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_task_story_generation_mean_judgment",
          "value": 1,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "instruction_following",
          "task": "story_generation",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench task aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_task_typos_mean_judgment",
          "value": 0,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "language",
          "task": "typos",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench task aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_chinese_rating",
          "value": 1011.17775437571,
          "normalized_0_100": 34.87,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 312,
          "lower": 986.4587003770027,
          "upper": 1035.8968083744173,
          "category": "chinese",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_coding_rating",
          "value": 1116.3077107943595,
          "normalized_0_100": 47.37,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 296,
          "lower": 1100.7106032714091,
          "upper": 1131.9048183173102,
          "category": "coding",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_overall_rating",
          "value": 1094.163362887946,
          "normalized_0_100": 44.74,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 314,
          "lower": 1085.4128286988514,
          "upper": 1102.9138970770407,
          "category": "overall",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        }
      ]
    },
    {
      "model_key": "gemma-3-12b-it",
      "display_name": "gemma-3-12b-it",
      "provider": "google",
      "aliases": [
        "gemma-3-12b-it"
      ],
      "openness": null,
      "license": "Gemma",
      "parameter_billions": null,
      "context_window": null,
      "modalities": [
        "text"
      ],
      "source_ids": [
        "livebench",
        "lmarena"
      ],
      "composite": {
        "score_0_100": 48.29,
        "confidence": "moderate",
        "components": [
          {
            "source_id": "lmarena",
            "metric": "lmarena_text_overall_rating",
            "normalized_0_100": 73.28,
            "weight": 0.35
          },
          {
            "source_id": "livebench",
            "metric": "livebench_overall_mean_judgment",
            "normalized_0_100": 4.55,
            "weight": 0.2
          }
        ],
        "derived_from": [
          "lmarena:lmarena_text_overall_rating",
          "livebench:livebench_overall_mean_judgment"
        ],
        "limitations": [
          "This is an external model-intelligence prior, not a RunLocalAI local speed or fit measurement.",
          "Provider papers, human-preference ratings, and LiveBench judgments are not interchangeable with quantized local execution.",
          "Use this score only beside Will-It-Run fit, quantization, context, runtime, and measured tok/s evidence."
        ]
      },
      "scores": [
        {
          "source_id": "livebench",
          "metric": "livebench_category_language_mean_judgment",
          "value": 0.045455,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "language",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench category aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_overall_mean_judgment",
          "value": 0.045455,
          "normalized_0_100": 4.55,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench judgment aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_task_plot_unscrambling_mean_judgment",
          "value": 0.090909,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "language",
          "task": "plot_unscrambling",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench task aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_task_typos_mean_judgment",
          "value": 0,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "language",
          "task": "typos",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench task aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_coding_rating",
          "value": 1281.7349867854796,
          "normalized_0_100": 67.05,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 218,
          "lower": 1258.850436914761,
          "upper": 1304.6195366561983,
          "category": "coding",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_overall_rating",
          "value": 1334.167096409418,
          "normalized_0_100": 73.28,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 167,
          "lower": 1324.7875197279554,
          "upper": 1343.5466730908806,
          "category": "overall",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        }
      ]
    },
    {
      "model_key": "mistral-large-2411",
      "display_name": "mistral-large-2411",
      "provider": "mistral",
      "aliases": [
        "mistral-large-2411"
      ],
      "openness": null,
      "license": "MRL",
      "parameter_billions": null,
      "context_window": null,
      "modalities": [
        "text"
      ],
      "source_ids": [
        "livebench",
        "lmarena"
      ],
      "composite": {
        "score_0_100": 47.8,
        "confidence": "moderate",
        "components": [
          {
            "source_id": "lmarena",
            "metric": "lmarena_text_overall_rating",
            "normalized_0_100": 65.09,
            "weight": 0.35
          },
          {
            "source_id": "livebench",
            "metric": "livebench_overall_mean_judgment",
            "normalized_0_100": 17.53,
            "weight": 0.2
          }
        ],
        "derived_from": [
          "lmarena:lmarena_text_overall_rating",
          "livebench:livebench_overall_mean_judgment"
        ],
        "limitations": [
          "This is an external model-intelligence prior, not a RunLocalAI local speed or fit measurement.",
          "Provider papers, human-preference ratings, and LiveBench judgments are not interchangeable with quantized local execution.",
          "Use this score only beside Will-It-Run fit, quantization, context, runtime, and measured tok/s evidence."
        ]
      },
      "scores": [
        {
          "source_id": "livebench",
          "metric": "livebench_category_coding_mean_judgment",
          "value": 0,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "coding",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench category aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_category_instruction_following_mean_judgment",
          "value": 0.333333,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "instruction_following",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench category aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_category_language_mean_judgment",
          "value": 0.1787,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "language",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench category aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_overall_mean_judgment",
          "value": 0.175262,
          "normalized_0_100": 17.53,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench judgment aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_task_coding_completion_mean_judgment",
          "value": 0,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "coding",
          "task": "coding_completion",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench task aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_task_paraphrase_mean_judgment",
          "value": 0.333333,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "instruction_following",
          "task": "paraphrase",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench task aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_task_plot_unscrambling_mean_judgment",
          "value": 0.297833,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "language",
          "task": "plot_unscrambling",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench task aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_task_typos_mean_judgment",
          "value": 0,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "language",
          "task": "typos",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench task aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_chinese_rating",
          "value": 1238.6007504606332,
          "normalized_0_100": 61.92,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 221,
          "lower": 1225.1975462106304,
          "upper": 1252.0039547106358,
          "category": "chinese",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_coding_rating",
          "value": 1275.9695730884437,
          "normalized_0_100": 66.36,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 221,
          "lower": 1267.1838156776023,
          "upper": 1284.7553304992853,
          "category": "coding",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_overall_rating",
          "value": 1265.3225470522218,
          "normalized_0_100": 65.09,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 228,
          "lower": 1260.9186110404207,
          "upper": 1269.726483064023,
          "category": "overall",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        }
      ]
    },
    {
      "model_key": "mistral-large-2402",
      "display_name": "mistral-large-2402",
      "provider": "mistral",
      "aliases": [
        "mistral-large-2402"
      ],
      "openness": null,
      "license": "Proprietary",
      "parameter_billions": null,
      "context_window": null,
      "modalities": [
        "text"
      ],
      "source_ids": [
        "livebench",
        "lmarena"
      ],
      "composite": {
        "score_0_100": 45.4,
        "confidence": "moderate",
        "components": [
          {
            "source_id": "lmarena",
            "metric": "lmarena_text_overall_rating",
            "normalized_0_100": 54.53,
            "weight": 0.35
          },
          {
            "source_id": "livebench",
            "metric": "livebench_overall_mean_judgment",
            "normalized_0_100": 29.41,
            "weight": 0.2
          }
        ],
        "derived_from": [
          "lmarena:lmarena_text_overall_rating",
          "livebench:livebench_overall_mean_judgment"
        ],
        "limitations": [
          "This is an external model-intelligence prior, not a RunLocalAI local speed or fit measurement.",
          "Provider papers, human-preference ratings, and LiveBench judgments are not interchangeable with quantized local execution.",
          "Use this score only beside Will-It-Run fit, quantization, context, runtime, and measured tok/s evidence."
        ]
      },
      "scores": [
        {
          "source_id": "livebench",
          "metric": "livebench_category_coding_mean_judgment",
          "value": 0,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "coding",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench category aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_category_instruction_following_mean_judgment",
          "value": 1,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "instruction_following",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench category aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_category_language_mean_judgment",
          "value": 0.156863,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "language",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench category aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_overall_mean_judgment",
          "value": 0.294118,
          "normalized_0_100": 29.41,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench judgment aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_task_coding_completion_mean_judgment",
          "value": 0,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "coding",
          "task": "coding_completion",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench task aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_task_connections_mean_judgment",
          "value": 0,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "language",
          "task": "connections",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench task aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_task_plot_unscrambling_mean_judgment",
          "value": 0.470588,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "language",
          "task": "plot_unscrambling",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench task aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_task_story_generation_mean_judgment",
          "value": 1,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "instruction_following",
          "task": "story_generation",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench task aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_task_typos_mean_judgment",
          "value": 0,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "language",
          "task": "typos",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench task aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_chinese_rating",
          "value": 1119.3477238532585,
          "normalized_0_100": 47.74,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 276,
          "lower": 1108.8349998661665,
          "upper": 1129.8604478403504,
          "category": "chinese",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_coding_rating",
          "value": 1183.4563406991533,
          "normalized_0_100": 55.36,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 270,
          "lower": 1174.9679730134956,
          "upper": 1191.944708384811,
          "category": "coding",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_overall_rating",
          "value": 1176.5212804324033,
          "normalized_0_100": 54.53,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 274,
          "lower": 1171.8320254881062,
          "upper": 1181.2105353767001,
          "category": "overall",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        }
      ]
    },
    {
      "model_key": "phi-3-small-8k-instruct",
      "display_name": "phi-3-small-8k-instruct",
      "provider": "microsoft",
      "aliases": [
        "Phi-3-small-8k-instruct",
        "phi-3-small-8k-instruct"
      ],
      "openness": null,
      "license": "MIT",
      "parameter_billions": null,
      "context_window": null,
      "modalities": [
        "text"
      ],
      "source_ids": [
        "livebench",
        "lmarena"
      ],
      "composite": {
        "score_0_100": 45.25,
        "confidence": "moderate",
        "components": [
          {
            "source_id": "lmarena",
            "metric": "lmarena_text_overall_rating",
            "normalized_0_100": 46.61,
            "weight": 0.35
          },
          {
            "source_id": "livebench",
            "metric": "livebench_overall_mean_judgment",
            "normalized_0_100": 42.86,
            "weight": 0.2
          }
        ],
        "derived_from": [
          "lmarena:lmarena_text_overall_rating",
          "livebench:livebench_overall_mean_judgment"
        ],
        "limitations": [
          "This is an external model-intelligence prior, not a RunLocalAI local speed or fit measurement.",
          "Provider papers, human-preference ratings, and LiveBench judgments are not interchangeable with quantized local execution.",
          "Use this score only beside Will-It-Run fit, quantization, context, runtime, and measured tok/s evidence."
        ]
      },
      "scores": [
        {
          "source_id": "livebench",
          "metric": "livebench_category_coding_mean_judgment",
          "value": 0.4,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "coding",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench category aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_category_instruction_following_mean_judgment",
          "value": 1,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "instruction_following",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench category aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_category_instruction_following_mean_judgment",
          "value": 0.333333,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "instruction_following",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench category aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_category_language_mean_judgment",
          "value": 0,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "language",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench category aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_overall_mean_judgment",
          "value": 0.428571,
          "normalized_0_100": 42.86,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench judgment aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_overall_mean_judgment",
          "value": 0.333333,
          "normalized_0_100": 33.33,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench judgment aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_task_coding_completion_mean_judgment",
          "value": 1,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "coding",
          "task": "coding_completion",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench task aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_task_connections_mean_judgment",
          "value": 0,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "language",
          "task": "connections",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench task aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_task_lcb_generation_mean_judgment",
          "value": 0.25,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "coding",
          "task": "LCB_generation",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench task aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_task_paraphrase_mean_judgment",
          "value": 0.333333,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "instruction_following",
          "task": "paraphrase",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench task aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_task_story_generation_mean_judgment",
          "value": 1,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "instruction_following",
          "task": "story_generation",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench task aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_chinese_rating",
          "value": 1059.8102275908716,
          "normalized_0_100": 40.66,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 295,
          "lower": 1045.907422297144,
          "upper": 1073.7130328845994,
          "category": "chinese",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_coding_rating",
          "value": 1101.9307900448212,
          "normalized_0_100": 45.66,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 304,
          "lower": 1090.3794834498533,
          "upper": 1113.4820966397892,
          "category": "coding",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_overall_rating",
          "value": 1109.8660731117284,
          "normalized_0_100": 46.61,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 304,
          "lower": 1104.0123312233357,
          "upper": 1115.7198150001211,
          "category": "overall",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        }
      ]
    },
    {
      "model_key": "gemma-3-4b-it",
      "display_name": "gemma-3-4b-it",
      "provider": "google",
      "aliases": [
        "gemma-3-4b-it"
      ],
      "openness": null,
      "license": "Gemma",
      "parameter_billions": null,
      "context_window": null,
      "modalities": [
        "text"
      ],
      "source_ids": [
        "livebench",
        "lmarena"
      ],
      "composite": {
        "score_0_100": 45,
        "confidence": "moderate",
        "components": [
          {
            "source_id": "lmarena",
            "metric": "lmarena_text_overall_rating",
            "normalized_0_100": 68.12,
            "weight": 0.35
          },
          {
            "source_id": "livebench",
            "metric": "livebench_overall_mean_judgment",
            "normalized_0_100": 4.55,
            "weight": 0.2
          }
        ],
        "derived_from": [
          "lmarena:lmarena_text_overall_rating",
          "livebench:livebench_overall_mean_judgment"
        ],
        "limitations": [
          "This is an external model-intelligence prior, not a RunLocalAI local speed or fit measurement.",
          "Provider papers, human-preference ratings, and LiveBench judgments are not interchangeable with quantized local execution.",
          "Use this score only beside Will-It-Run fit, quantization, context, runtime, and measured tok/s evidence."
        ]
      },
      "scores": [
        {
          "source_id": "livebench",
          "metric": "livebench_category_language_mean_judgment",
          "value": 0.045455,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "language",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench category aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_overall_mean_judgment",
          "value": 0.045455,
          "normalized_0_100": 4.55,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench judgment aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_task_plot_unscrambling_mean_judgment",
          "value": 0.090909,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "language",
          "task": "plot_unscrambling",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench task aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_task_typos_mean_judgment",
          "value": 0,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "language",
          "task": "typos",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench task aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_coding_rating",
          "value": 1230.804155814666,
          "normalized_0_100": 60.99,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 245,
          "lower": 1207.311088686345,
          "upper": 1254.297222942987,
          "category": "coding",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_overall_rating",
          "value": 1290.7590015825526,
          "normalized_0_100": 68.12,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 201,
          "lower": 1281.5267102163564,
          "upper": 1299.9912929487487,
          "category": "overall",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        }
      ]
    },
    {
      "model_key": "mistral-7b-instruct-v0-2",
      "display_name": "mistral-7b-instruct-v0.2",
      "provider": "mistral",
      "aliases": [
        "Mistral-7B-Instruct-v0.2",
        "mistral-7b-instruct-v0.2"
      ],
      "openness": null,
      "license": "Apache-2.0",
      "parameter_billions": null,
      "context_window": null,
      "modalities": [
        "text"
      ],
      "source_ids": [
        "livebench",
        "lmarena"
      ],
      "composite": {
        "score_0_100": 43.36,
        "confidence": "moderate",
        "components": [
          {
            "source_id": "lmarena",
            "metric": "lmarena_text_overall_rating",
            "normalized_0_100": 44.2,
            "weight": 0.35
          },
          {
            "source_id": "livebench",
            "metric": "livebench_overall_mean_judgment",
            "normalized_0_100": 41.9,
            "weight": 0.2
          }
        ],
        "derived_from": [
          "lmarena:lmarena_text_overall_rating",
          "livebench:livebench_overall_mean_judgment"
        ],
        "limitations": [
          "This is an external model-intelligence prior, not a RunLocalAI local speed or fit measurement.",
          "Provider papers, human-preference ratings, and LiveBench judgments are not interchangeable with quantized local execution.",
          "Use this score only beside Will-It-Run fit, quantization, context, runtime, and measured tok/s evidence."
        ]
      },
      "scores": [
        {
          "source_id": "livebench",
          "metric": "livebench_category_coding_mean_judgment",
          "value": 0.5,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "coding",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench category aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_category_instruction_following_mean_judgment",
          "value": 1,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "instruction_following",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench category aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_category_language_mean_judgment",
          "value": 0.047619,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "language",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench category aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_overall_mean_judgment",
          "value": 0.419048,
          "normalized_0_100": 41.9,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench judgment aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_task_coding_completion_mean_judgment",
          "value": 0,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "coding",
          "task": "coding_completion",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench task aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_task_connections_mean_judgment",
          "value": 0,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "language",
          "task": "connections",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench task aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_task_lcb_generation_mean_judgment",
          "value": 1,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "coding",
          "task": "LCB_generation",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench task aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_task_plot_unscrambling_mean_judgment",
          "value": 0.095238,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "language",
          "task": "plot_unscrambling",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench task aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_task_story_generation_mean_judgment",
          "value": 1,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "instruction_following",
          "task": "story_generation",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench task aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_chinese_rating",
          "value": 1008.3697855494333,
          "normalized_0_100": 34.54,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 314,
          "lower": 993.3391250882007,
          "upper": 1023.4004460106661,
          "category": "chinese",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_overall_rating",
          "value": 1089.6072904190728,
          "normalized_0_100": 44.2,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 317,
          "lower": 1083.0526047117078,
          "upper": 1096.1619761264378,
          "category": "overall",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        }
      ]
    },
    {
      "model_key": "phi-3-medium-4k-instruct",
      "display_name": "phi-3-medium-4k-instruct",
      "provider": "microsoft",
      "aliases": [
        "Phi-3-medium-4k-instruct",
        "phi-3-medium-4k-instruct"
      ],
      "openness": null,
      "license": "MIT",
      "parameter_billions": null,
      "context_window": null,
      "modalities": [
        "text"
      ],
      "source_ids": [
        "livebench",
        "lmarena"
      ],
      "composite": {
        "score_0_100": 42.64,
        "confidence": "moderate",
        "components": [
          {
            "source_id": "lmarena",
            "metric": "lmarena_text_overall_rating",
            "normalized_0_100": 49.9,
            "weight": 0.35
          },
          {
            "source_id": "livebench",
            "metric": "livebench_overall_mean_judgment",
            "normalized_0_100": 29.93,
            "weight": 0.2
          }
        ],
        "derived_from": [
          "lmarena:lmarena_text_overall_rating",
          "livebench:livebench_overall_mean_judgment"
        ],
        "limitations": [
          "This is an external model-intelligence prior, not a RunLocalAI local speed or fit measurement.",
          "Provider papers, human-preference ratings, and LiveBench judgments are not interchangeable with quantized local execution.",
          "Use this score only beside Will-It-Run fit, quantization, context, runtime, and measured tok/s evidence."
        ]
      },
      "scores": [
        {
          "source_id": "livebench",
          "metric": "livebench_category_coding_mean_judgment",
          "value": 0.25,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "coding",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench category aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_category_instruction_following_mean_judgment",
          "value": 1,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "instruction_following",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench category aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_category_instruction_following_mean_judgment",
          "value": 0.333333,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "instruction_following",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench category aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_category_language_mean_judgment",
          "value": 0.047619,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "language",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench category aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_overall_mean_judgment",
          "value": 0.29932,
          "normalized_0_100": 29.93,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench judgment aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_overall_mean_judgment",
          "value": 0.333333,
          "normalized_0_100": 33.33,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench judgment aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_task_coding_completion_mean_judgment",
          "value": 0,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "coding",
          "task": "coding_completion",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench task aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_task_connections_mean_judgment",
          "value": 0,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "language",
          "task": "connections",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench task aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_task_lcb_generation_mean_judgment",
          "value": 0.333333,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "coding",
          "task": "LCB_generation",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench task aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_task_paraphrase_mean_judgment",
          "value": 0.333333,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "instruction_following",
          "task": "paraphrase",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench task aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_task_plot_unscrambling_mean_judgment",
          "value": 0.095238,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "language",
          "task": "plot_unscrambling",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench task aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_task_story_generation_mean_judgment",
          "value": 1,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "instruction_following",
          "task": "story_generation",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench task aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_chinese_rating",
          "value": 1107.1608217948453,
          "normalized_0_100": 46.29,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 281,
          "lower": 1094.4616135228976,
          "upper": 1119.8600300667931,
          "category": "chinese",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_coding_rating",
          "value": 1130.643615054963,
          "normalized_0_100": 49.08,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 291,
          "lower": 1120.4537065193767,
          "upper": 1140.833523590549,
          "category": "coding",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_overall_rating",
          "value": 1137.5940410703095,
          "normalized_0_100": 49.9,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 290,
          "lower": 1132.4762250930742,
          "upper": 1142.7118570475448,
          "category": "overall",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        }
      ]
    },
    {
      "model_key": "gemma-1-1-7b-it",
      "display_name": "gemma-1.1-7b-it",
      "provider": "google",
      "aliases": [
        "gemma-1.1-7b-it"
      ],
      "openness": null,
      "license": "Gemma license",
      "parameter_billions": null,
      "context_window": null,
      "modalities": [
        "text"
      ],
      "source_ids": [
        "livebench",
        "lmarena"
      ],
      "composite": {
        "score_0_100": 42,
        "confidence": "moderate",
        "components": [
          {
            "source_id": "lmarena",
            "metric": "lmarena_text_overall_rating",
            "normalized_0_100": 44.72,
            "weight": 0.35
          },
          {
            "source_id": "livebench",
            "metric": "livebench_overall_mean_judgment",
            "normalized_0_100": 37.25,
            "weight": 0.2
          }
        ],
        "derived_from": [
          "lmarena:lmarena_text_overall_rating",
          "livebench:livebench_overall_mean_judgment"
        ],
        "limitations": [
          "This is an external model-intelligence prior, not a RunLocalAI local speed or fit measurement.",
          "Provider papers, human-preference ratings, and LiveBench judgments are not interchangeable with quantized local execution.",
          "Use this score only beside Will-It-Run fit, quantization, context, runtime, and measured tok/s evidence."
        ]
      },
      "scores": [
        {
          "source_id": "livebench",
          "metric": "livebench_category_coding_mean_judgment",
          "value": 0.5,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "coding",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench category aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_category_instruction_following_mean_judgment",
          "value": 1,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "instruction_following",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench category aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_category_language_mean_judgment",
          "value": 0.078431,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "language",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench category aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_overall_mean_judgment",
          "value": 0.372549,
          "normalized_0_100": 37.25,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench judgment aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_task_connections_mean_judgment",
          "value": 0,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "language",
          "task": "connections",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench task aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_task_lcb_generation_mean_judgment",
          "value": 0.5,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "coding",
          "task": "LCB_generation",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench task aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_task_plot_unscrambling_mean_judgment",
          "value": 0.235294,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "language",
          "task": "plot_unscrambling",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench task aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_task_story_generation_mean_judgment",
          "value": 1,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "instruction_following",
          "task": "story_generation",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench task aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_task_typos_mean_judgment",
          "value": 0,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "language",
          "task": "typos",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench task aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_chinese_rating",
          "value": 1059.6138995006474,
          "normalized_0_100": 40.63,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 296,
          "lower": 1047.2338512808926,
          "upper": 1071.9939477204025,
          "category": "chinese",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_overall_rating",
          "value": 1094.0150957304422,
          "normalized_0_100": 44.72,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 315,
          "lower": 1088.005791549252,
          "upper": 1100.0243999116326,
          "category": "overall",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        }
      ]
    },
    {
      "model_key": "qwen2-5-coder-32b-instruct",
      "display_name": "qwen2.5-coder-32b-instruct",
      "provider": "alibaba",
      "aliases": [
        "Qwen2.5-Coder-32B-Instruct",
        "qwen2.5-coder-32b-instruct"
      ],
      "openness": null,
      "license": "Apache 2.0",
      "parameter_billions": null,
      "context_window": null,
      "modalities": [
        "text"
      ],
      "source_ids": [
        "livebench",
        "lmarena"
      ],
      "composite": {
        "score_0_100": 40.91,
        "confidence": "moderate",
        "components": [
          {
            "source_id": "lmarena",
            "metric": "lmarena_text_overall_rating",
            "normalized_0_100": 60.9,
            "weight": 0.35
          },
          {
            "source_id": "livebench",
            "metric": "livebench_overall_mean_judgment",
            "normalized_0_100": 5.93,
            "weight": 0.2
          }
        ],
        "derived_from": [
          "lmarena:lmarena_text_overall_rating",
          "livebench:livebench_overall_mean_judgment"
        ],
        "limitations": [
          "This is an external model-intelligence prior, not a RunLocalAI local speed or fit measurement.",
          "Provider papers, human-preference ratings, and LiveBench judgments are not interchangeable with quantized local execution.",
          "Use this score only beside Will-It-Run fit, quantization, context, runtime, and measured tok/s evidence."
        ]
      },
      "scores": [
        {
          "source_id": "livebench",
          "metric": "livebench_category_coding_mean_judgment",
          "value": 0.666667,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "coding",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench category aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_category_instruction_following_mean_judgment",
          "value": 0.65,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "instruction_following",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench category aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_category_language_mean_judgment",
          "value": 0.05933,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "language",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench category aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_category_language_mean_judgment",
          "value": 1,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "language",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench category aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_overall_mean_judgment",
          "value": 0.05933,
          "normalized_0_100": 5.93,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench judgment aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_overall_mean_judgment",
          "value": 0.716667,
          "normalized_0_100": 71.67,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench judgment aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_task_connections_mean_judgment",
          "value": 1,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "language",
          "task": "connections",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench task aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_task_lcb_generation_mean_judgment",
          "value": 0.666667,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "coding",
          "task": "LCB_generation",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench task aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_task_paraphrase_mean_judgment",
          "value": 0.3,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "instruction_following",
          "task": "paraphrase",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench task aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_task_plot_unscrambling_mean_judgment",
          "value": 0.088995,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "language",
          "task": "plot_unscrambling",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench task aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_task_story_generation_mean_judgment",
          "value": 1,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "instruction_following",
          "task": "story_generation",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench task aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_task_typos_mean_judgment",
          "value": 0,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "language",
          "task": "typos",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench task aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_chinese_rating",
          "value": 1220.8954467409915,
          "normalized_0_100": 59.81,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 233,
          "lower": 1196.5849205903146,
          "upper": 1245.2059728916684,
          "category": "chinese",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_coding_rating",
          "value": 1276.152977411752,
          "normalized_0_100": 66.38,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 220,
          "lower": 1257.9592580644346,
          "upper": 1294.3466967590693,
          "category": "coding",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_overall_rating",
          "value": 1230.0935922153328,
          "normalized_0_100": 60.9,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 245,
          "lower": 1222.04712473551,
          "upper": 1238.1400596951557,
          "category": "overall",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        }
      ]
    },
    {
      "model_key": "phi-4",
      "display_name": "phi-4",
      "provider": "microsoft",
      "aliases": [
        "phi-4"
      ],
      "openness": null,
      "license": "MIT",
      "parameter_billions": null,
      "context_window": null,
      "modalities": [
        "text"
      ],
      "source_ids": [
        "livebench",
        "lmarena"
      ],
      "composite": {
        "score_0_100": 40.46,
        "confidence": "moderate",
        "components": [
          {
            "source_id": "lmarena",
            "metric": "lmarena_text_overall_rating",
            "normalized_0_100": 59.31,
            "weight": 0.35
          },
          {
            "source_id": "livebench",
            "metric": "livebench_overall_mean_judgment",
            "normalized_0_100": 7.46,
            "weight": 0.2
          }
        ],
        "derived_from": [
          "lmarena:lmarena_text_overall_rating",
          "livebench:livebench_overall_mean_judgment"
        ],
        "limitations": [
          "This is an external model-intelligence prior, not a RunLocalAI local speed or fit measurement.",
          "Provider papers, human-preference ratings, and LiveBench judgments are not interchangeable with quantized local execution.",
          "Use this score only beside Will-It-Run fit, quantization, context, runtime, and measured tok/s evidence."
        ]
      },
      "scores": [
        {
          "source_id": "livebench",
          "metric": "livebench_category_coding_mean_judgment",
          "value": 0,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "coding",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench category aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_category_instruction_following_mean_judgment",
          "value": 0.166667,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "instruction_following",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench category aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_category_language_mean_judgment",
          "value": 0.06874,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "language",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench category aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_overall_mean_judgment",
          "value": 0.074577,
          "normalized_0_100": 7.46,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench judgment aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_task_coding_completion_mean_judgment",
          "value": 0,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "coding",
          "task": "coding_completion",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench task aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_task_paraphrase_mean_judgment",
          "value": 0.166667,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "instruction_following",
          "task": "paraphrase",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench task aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_task_plot_unscrambling_mean_judgment",
          "value": 0.10311,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "language",
          "task": "plot_unscrambling",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench task aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_task_typos_mean_judgment",
          "value": 0,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "language",
          "task": "typos",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench task aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_chinese_rating",
          "value": 1211.287629577404,
          "normalized_0_100": 58.67,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 241,
          "lower": 1195.8283175518982,
          "upper": 1226.7469416029098,
          "category": "chinese",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_coding_rating",
          "value": 1231.8463379683633,
          "normalized_0_100": 61.11,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 243,
          "lower": 1222.0358249148658,
          "upper": 1241.6568510218608,
          "category": "coding",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_overall_rating",
          "value": 1216.7136091277503,
          "normalized_0_100": 59.31,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 258,
          "lower": 1212.1587644294773,
          "upper": 1221.2684538260235,
          "category": "overall",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        }
      ]
    },
    {
      "model_key": "gemini-1-5-flash-8b-001",
      "display_name": "gemini-1.5-flash-8b-001",
      "provider": "google",
      "aliases": [
        "gemini-1.5-flash-8b-001"
      ],
      "openness": null,
      "license": "Proprietary",
      "parameter_billions": null,
      "context_window": null,
      "modalities": [
        "text"
      ],
      "source_ids": [
        "livebench",
        "lmarena"
      ],
      "composite": {
        "score_0_100": 40.21,
        "confidence": "moderate",
        "components": [
          {
            "source_id": "lmarena",
            "metric": "lmarena_text_overall_rating",
            "normalized_0_100": 60.41,
            "weight": 0.35
          },
          {
            "source_id": "livebench",
            "metric": "livebench_overall_mean_judgment",
            "normalized_0_100": 4.85,
            "weight": 0.2
          }
        ],
        "derived_from": [
          "lmarena:lmarena_text_overall_rating",
          "livebench:livebench_overall_mean_judgment"
        ],
        "limitations": [
          "This is an external model-intelligence prior, not a RunLocalAI local speed or fit measurement.",
          "Provider papers, human-preference ratings, and LiveBench judgments are not interchangeable with quantized local execution.",
          "Use this score only beside Will-It-Run fit, quantization, context, runtime, and measured tok/s evidence."
        ]
      },
      "scores": [
        {
          "source_id": "livebench",
          "metric": "livebench_category_language_mean_judgment",
          "value": 0.048485,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "language",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench category aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_overall_mean_judgment",
          "value": 0.048485,
          "normalized_0_100": 4.85,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench judgment aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_task_plot_unscrambling_mean_judgment",
          "value": 0.145455,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "language",
          "task": "plot_unscrambling",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench task aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_task_typos_mean_judgment",
          "value": 0,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "language",
          "task": "typos",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench task aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_chinese_rating",
          "value": 1229.9323832109014,
          "normalized_0_100": 60.89,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 226,
          "lower": 1218.459472217634,
          "upper": 1241.405294204169,
          "category": "chinese",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_coding_rating",
          "value": 1218.1539624083366,
          "normalized_0_100": 59.48,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 252,
          "lower": 1210.1462881482566,
          "upper": 1226.161636668417,
          "category": "coding",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_overall_rating",
          "value": 1225.9046827808065,
          "normalized_0_100": 60.41,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 251,
          "lower": 1221.6507803210316,
          "upper": 1230.1585852405815,
          "category": "overall",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        }
      ]
    },
    {
      "model_key": "openhermes-2-5-mistral-7b",
      "display_name": "openhermes-2.5-mistral-7b",
      "provider": null,
      "aliases": [
        "OpenHermes-2.5-Mistral-7B",
        "openhermes-2.5-mistral-7b"
      ],
      "openness": null,
      "license": "Apache-2.0",
      "parameter_billions": null,
      "context_window": null,
      "modalities": [
        "text"
      ],
      "source_ids": [
        "livebench",
        "lmarena"
      ],
      "composite": {
        "score_0_100": 39.23,
        "confidence": "moderate",
        "components": [
          {
            "source_id": "lmarena",
            "metric": "lmarena_text_overall_rating",
            "normalized_0_100": 44.55,
            "weight": 0.35
          },
          {
            "source_id": "livebench",
            "metric": "livebench_overall_mean_judgment",
            "normalized_0_100": 29.93,
            "weight": 0.2
          }
        ],
        "derived_from": [
          "lmarena:lmarena_text_overall_rating",
          "livebench:livebench_overall_mean_judgment"
        ],
        "limitations": [
          "This is an external model-intelligence prior, not a RunLocalAI local speed or fit measurement.",
          "Provider papers, human-preference ratings, and LiveBench judgments are not interchangeable with quantized local execution.",
          "Use this score only beside Will-It-Run fit, quantization, context, runtime, and measured tok/s evidence."
        ]
      },
      "scores": [
        {
          "source_id": "livebench",
          "metric": "livebench_category_coding_mean_judgment",
          "value": 0.25,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "coding",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench category aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_category_instruction_following_mean_judgment",
          "value": 1,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "instruction_following",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench category aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_category_language_mean_judgment",
          "value": 0.047619,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "language",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench category aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_overall_mean_judgment",
          "value": 0.29932,
          "normalized_0_100": 29.93,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench judgment aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_task_coding_completion_mean_judgment",
          "value": 0,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "coding",
          "task": "coding_completion",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench task aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_task_connections_mean_judgment",
          "value": 0,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "language",
          "task": "connections",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench task aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_task_lcb_generation_mean_judgment",
          "value": 0.333333,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "coding",
          "task": "LCB_generation",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench task aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_task_plot_unscrambling_mean_judgment",
          "value": 0.095238,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "language",
          "task": "plot_unscrambling",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench task aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_task_story_generation_mean_judgment",
          "value": 1,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "instruction_following",
          "task": "story_generation",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench task aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_chinese_rating",
          "value": 1013.3655289755501,
          "normalized_0_100": 35.13,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 309,
          "lower": 971.1845649177549,
          "upper": 1055.546493033345,
          "category": "chinese",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_overall_rating",
          "value": 1092.5414854203702,
          "normalized_0_100": 44.55,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 316,
          "lower": 1082.1910859525947,
          "upper": 1102.8918848881456,
          "category": "overall",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        }
      ]
    },
    {
      "model_key": "phi-3-mini-4k-instruct",
      "display_name": "phi-3-mini-4k-instruct",
      "provider": "microsoft",
      "aliases": [
        "Phi-3-mini-4k-instruct",
        "phi-3-mini-4k-instruct"
      ],
      "openness": null,
      "license": "MIT",
      "parameter_billions": null,
      "context_window": null,
      "modalities": [
        "text"
      ],
      "source_ids": [
        "livebench",
        "lmarena"
      ],
      "composite": {
        "score_0_100": 37.29,
        "confidence": "moderate",
        "components": [
          {
            "source_id": "lmarena",
            "metric": "lmarena_text_overall_rating",
            "normalized_0_100": 42.27,
            "weight": 0.35
          },
          {
            "source_id": "livebench",
            "metric": "livebench_overall_mean_judgment",
            "normalized_0_100": 28.57,
            "weight": 0.2
          }
        ],
        "derived_from": [
          "lmarena:lmarena_text_overall_rating",
          "livebench:livebench_overall_mean_judgment"
        ],
        "limitations": [
          "This is an external model-intelligence prior, not a RunLocalAI local speed or fit measurement.",
          "Provider papers, human-preference ratings, and LiveBench judgments are not interchangeable with quantized local execution.",
          "Use this score only beside Will-It-Run fit, quantization, context, runtime, and measured tok/s evidence."
        ]
      },
      "scores": [
        {
          "source_id": "livebench",
          "metric": "livebench_category_coding_mean_judgment",
          "value": 0.2,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "coding",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench category aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_category_instruction_following_mean_judgment",
          "value": 1,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "instruction_following",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench category aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_category_instruction_following_mean_judgment",
          "value": 0.333333,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "instruction_following",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench category aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_category_language_mean_judgment",
          "value": 0,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "language",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench category aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_overall_mean_judgment",
          "value": 0.285714,
          "normalized_0_100": 28.57,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench judgment aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_overall_mean_judgment",
          "value": 0.333333,
          "normalized_0_100": 33.33,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench judgment aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_task_coding_completion_mean_judgment",
          "value": 0,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "coding",
          "task": "coding_completion",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench task aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_task_connections_mean_judgment",
          "value": 0,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "language",
          "task": "connections",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench task aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_task_lcb_generation_mean_judgment",
          "value": 0.25,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "coding",
          "task": "LCB_generation",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench task aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_task_paraphrase_mean_judgment",
          "value": 0.333333,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "instruction_following",
          "task": "paraphrase",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench task aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_task_story_generation_mean_judgment",
          "value": 1,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "instruction_following",
          "task": "story_generation",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench task aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_chinese_rating",
          "value": 1020.0625713990175,
          "normalized_0_100": 35.93,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 306,
          "lower": 1005.7457763768116,
          "upper": 1034.3793664212235,
          "category": "chinese",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_coding_rating",
          "value": 1093.130700386923,
          "normalized_0_100": 44.62,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 309,
          "lower": 1081.5075064862588,
          "upper": 1104.7538942875876,
          "category": "coding",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_overall_rating",
          "value": 1073.3482720668137,
          "normalized_0_100": 42.27,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 325,
          "lower": 1067.0404825142182,
          "upper": 1079.656061619409,
          "category": "overall",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        }
      ]
    },
    {
      "model_key": "zephyr-7b-beta",
      "display_name": "zephyr-7b-beta",
      "provider": null,
      "aliases": [
        "zephyr-7b-beta"
      ],
      "openness": null,
      "license": "MIT",
      "parameter_billions": null,
      "context_window": null,
      "modalities": [
        "text"
      ],
      "source_ids": [
        "livebench",
        "lmarena"
      ],
      "composite": {
        "score_0_100": 36.76,
        "confidence": "moderate",
        "components": [
          {
            "source_id": "lmarena",
            "metric": "lmarena_text_overall_rating",
            "normalized_0_100": 41.8,
            "weight": 0.35
          },
          {
            "source_id": "livebench",
            "metric": "livebench_overall_mean_judgment",
            "normalized_0_100": 27.94,
            "weight": 0.2
          }
        ],
        "derived_from": [
          "lmarena:lmarena_text_overall_rating",
          "livebench:livebench_overall_mean_judgment"
        ],
        "limitations": [
          "This is an external model-intelligence prior, not a RunLocalAI local speed or fit measurement.",
          "Provider papers, human-preference ratings, and LiveBench judgments are not interchangeable with quantized local execution.",
          "Use this score only beside Will-It-Run fit, quantization, context, runtime, and measured tok/s evidence."
        ]
      },
      "scores": [
        {
          "source_id": "livebench",
          "metric": "livebench_category_coding_mean_judgment",
          "value": 0,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "coding",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench category aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_category_instruction_following_mean_judgment",
          "value": 1,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "instruction_following",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench category aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_category_language_mean_judgment",
          "value": 0.058824,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "language",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench category aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_overall_mean_judgment",
          "value": 0.279412,
          "normalized_0_100": 27.94,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench judgment aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_task_coding_completion_mean_judgment",
          "value": 0,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "coding",
          "task": "coding_completion",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench task aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_task_connections_mean_judgment",
          "value": 0,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "language",
          "task": "connections",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench task aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_task_plot_unscrambling_mean_judgment",
          "value": 0.117647,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "language",
          "task": "plot_unscrambling",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench task aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_task_story_generation_mean_judgment",
          "value": 1,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "instruction_following",
          "task": "story_generation",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench task aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_chinese_rating",
          "value": 959.0207549877539,
          "normalized_0_100": 28.67,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 321,
          "lower": 928.4936608017165,
          "upper": 989.5478491737912,
          "category": "chinese",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_overall_rating",
          "value": 1069.4634568763818,
          "normalized_0_100": 41.8,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 326,
          "lower": 1060.7317473173866,
          "upper": 1078.1951664353767,
          "category": "overall",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        }
      ]
    },
    {
      "model_key": "qwq-32b-preview",
      "display_name": "qwq-32b-preview",
      "provider": "alibaba",
      "aliases": [
        "QwQ-32B-Preview",
        "qwq-32b-preview"
      ],
      "openness": null,
      "license": "Apache 2.0",
      "parameter_billions": null,
      "context_window": null,
      "modalities": [
        "text"
      ],
      "source_ids": [
        "livebench",
        "lmarena"
      ],
      "composite": {
        "score_0_100": 36.18,
        "confidence": "moderate",
        "components": [
          {
            "source_id": "lmarena",
            "metric": "lmarena_text_overall_rating",
            "normalized_0_100": 52.79,
            "weight": 0.35
          },
          {
            "source_id": "livebench",
            "metric": "livebench_overall_mean_judgment",
            "normalized_0_100": 7.12,
            "weight": 0.2
          }
        ],
        "derived_from": [
          "lmarena:lmarena_text_overall_rating",
          "livebench:livebench_overall_mean_judgment"
        ],
        "limitations": [
          "This is an external model-intelligence prior, not a RunLocalAI local speed or fit measurement.",
          "Provider papers, human-preference ratings, and LiveBench judgments are not interchangeable with quantized local execution.",
          "Use this score only beside Will-It-Run fit, quantization, context, runtime, and measured tok/s evidence."
        ]
      },
      "scores": [
        {
          "source_id": "livebench",
          "metric": "livebench_category_coding_mean_judgment",
          "value": 0,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "coding",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench category aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_category_coding_mean_judgment",
          "value": 0.5,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "coding",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench category aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_category_instruction_following_mean_judgment",
          "value": 0,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "instruction_following",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench category aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_category_instruction_following_mean_judgment",
          "value": 0.4,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "instruction_following",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench category aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_category_language_mean_judgment",
          "value": 0.106797,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "language",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench category aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_overall_mean_judgment",
          "value": 0.071198,
          "normalized_0_100": 7.12,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench judgment aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_overall_mean_judgment",
          "value": 0.466667,
          "normalized_0_100": 46.67,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench judgment aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_task_coding_completion_mean_judgment",
          "value": 0,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "coding",
          "task": "coding_completion",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench task aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_task_coding_completion_mean_judgment",
          "value": 0,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "coding",
          "task": "coding_completion",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench task aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_task_lcb_generation_mean_judgment",
          "value": 1,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "coding",
          "task": "LCB_generation",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench task aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_task_paraphrase_mean_judgment",
          "value": 0,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "instruction_following",
          "task": "paraphrase",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench task aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_task_paraphrase_mean_judgment",
          "value": 0.4,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "instruction_following",
          "task": "paraphrase",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench task aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_task_plot_unscrambling_mean_judgment",
          "value": 0.142396,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "language",
          "task": "plot_unscrambling",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench task aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_task_typos_mean_judgment",
          "value": 0,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "language",
          "task": "typos",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench task aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_chinese_rating",
          "value": 1216.9321714201058,
          "normalized_0_100": 59.34,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 236,
          "lower": 1176.3410720679813,
          "upper": 1257.52327077223,
          "category": "chinese",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_coding_rating",
          "value": 1155.7393020635427,
          "normalized_0_100": 52.06,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 282,
          "lower": 1131.8977651827786,
          "upper": 1179.5808389443066,
          "category": "coding",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_overall_rating",
          "value": 1161.8440041524664,
          "normalized_0_100": 52.79,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 284,
          "lower": 1150.4802468514463,
          "upper": 1173.2077614534865,
          "category": "overall",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        }
      ]
    },
    {
      "model_key": "zephyr-7b-alpha",
      "display_name": "zephyr-7b-alpha",
      "provider": null,
      "aliases": [
        "zephyr-7b-alpha"
      ],
      "openness": null,
      "license": "MIT",
      "parameter_billions": null,
      "context_window": null,
      "modalities": [
        "text"
      ],
      "source_ids": [
        "livebench",
        "lmarena"
      ],
      "composite": {
        "score_0_100": 35.93,
        "confidence": "moderate",
        "components": [
          {
            "source_id": "lmarena",
            "metric": "lmarena_text_overall_rating",
            "normalized_0_100": 40.49,
            "weight": 0.35
          },
          {
            "source_id": "livebench",
            "metric": "livebench_overall_mean_judgment",
            "normalized_0_100": 27.94,
            "weight": 0.2
          }
        ],
        "derived_from": [
          "lmarena:lmarena_text_overall_rating",
          "livebench:livebench_overall_mean_judgment"
        ],
        "limitations": [
          "This is an external model-intelligence prior, not a RunLocalAI local speed or fit measurement.",
          "Provider papers, human-preference ratings, and LiveBench judgments are not interchangeable with quantized local execution.",
          "Use this score only beside Will-It-Run fit, quantization, context, runtime, and measured tok/s evidence."
        ]
      },
      "scores": [
        {
          "source_id": "livebench",
          "metric": "livebench_category_coding_mean_judgment",
          "value": 0,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "coding",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench category aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_category_instruction_following_mean_judgment",
          "value": 1,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "instruction_following",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench category aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_category_language_mean_judgment",
          "value": 0.058824,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "language",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench category aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_overall_mean_judgment",
          "value": 0.279412,
          "normalized_0_100": 27.94,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench judgment aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_task_coding_completion_mean_judgment",
          "value": 0,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "coding",
          "task": "coding_completion",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench task aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_task_connections_mean_judgment",
          "value": 0,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "language",
          "task": "connections",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench task aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_task_plot_unscrambling_mean_judgment",
          "value": 0.117647,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "language",
          "task": "plot_unscrambling",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench task aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_task_story_generation_mean_judgment",
          "value": 1,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "instruction_following",
          "task": "story_generation",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench task aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_overall_rating",
          "value": 1058.4107523288544,
          "normalized_0_100": 40.49,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 329,
          "lower": 1042.659002878123,
          "upper": 1074.1625017795861,
          "category": "overall",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        }
      ]
    },
    {
      "model_key": "phi-3-mini-128k-instruct",
      "display_name": "phi-3-mini-128k-instruct",
      "provider": "microsoft",
      "aliases": [
        "Phi-3-mini-128k-instruct",
        "phi-3-mini-128k-instruct"
      ],
      "openness": null,
      "license": "MIT",
      "parameter_billions": null,
      "context_window": null,
      "modalities": [
        "text"
      ],
      "source_ids": [
        "livebench",
        "lmarena"
      ],
      "composite": {
        "score_0_100": 34.9,
        "confidence": "moderate",
        "components": [
          {
            "source_id": "lmarena",
            "metric": "lmarena_text_overall_rating",
            "normalized_0_100": 39.54,
            "weight": 0.35
          },
          {
            "source_id": "livebench",
            "metric": "livebench_overall_mean_judgment",
            "normalized_0_100": 26.79,
            "weight": 0.2
          }
        ],
        "derived_from": [
          "lmarena:lmarena_text_overall_rating",
          "livebench:livebench_overall_mean_judgment"
        ],
        "limitations": [
          "This is an external model-intelligence prior, not a RunLocalAI local speed or fit measurement.",
          "Provider papers, human-preference ratings, and LiveBench judgments are not interchangeable with quantized local execution.",
          "Use this score only beside Will-It-Run fit, quantization, context, runtime, and measured tok/s evidence."
        ]
      },
      "scores": [
        {
          "source_id": "livebench",
          "metric": "livebench_category_coding_mean_judgment",
          "value": 0.2,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "coding",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench category aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_category_instruction_following_mean_judgment",
          "value": 1,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "instruction_following",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench category aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_category_instruction_following_mean_judgment",
          "value": 0.166667,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "instruction_following",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench category aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_category_language_mean_judgment",
          "value": 0.071429,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "language",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench category aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_overall_mean_judgment",
          "value": 0.267857,
          "normalized_0_100": 26.79,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench judgment aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_overall_mean_judgment",
          "value": 0.166667,
          "normalized_0_100": 16.67,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench judgment aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_task_coding_completion_mean_judgment",
          "value": 0,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "coding",
          "task": "coding_completion",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench task aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_task_connections_mean_judgment",
          "value": 0,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "language",
          "task": "connections",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench task aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_task_lcb_generation_mean_judgment",
          "value": 0.25,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "coding",
          "task": "LCB_generation",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench task aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_task_paraphrase_mean_judgment",
          "value": 0.166667,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "instruction_following",
          "task": "paraphrase",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench task aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_task_plot_unscrambling_mean_judgment",
          "value": 0.142857,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "language",
          "task": "plot_unscrambling",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench task aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_task_story_generation_mean_judgment",
          "value": 1,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "instruction_following",
          "task": "story_generation",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench task aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_chinese_rating",
          "value": 1015.3407336720429,
          "normalized_0_100": 35.37,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 308,
          "lower": 999.5573463632147,
          "upper": 1031.1241209808713,
          "category": "chinese",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_overall_rating",
          "value": 1050.4309108746988,
          "normalized_0_100": 39.54,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 338,
          "lower": 1043.1936663303463,
          "upper": 1057.6681554190511,
          "category": "overall",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        }
      ]
    },
    {
      "model_key": "qwen1-5-4b-chat",
      "display_name": "qwen1.5-4b-chat",
      "provider": "alibaba",
      "aliases": [
        "Qwen1.5-4B-Chat",
        "qwen1.5-4b-chat"
      ],
      "openness": null,
      "license": "Qianwen LICENSE",
      "parameter_billions": null,
      "context_window": null,
      "modalities": [
        "text"
      ],
      "source_ids": [
        "livebench",
        "lmarena"
      ],
      "composite": {
        "score_0_100": 32.5,
        "confidence": "moderate",
        "components": [
          {
            "source_id": "lmarena",
            "metric": "lmarena_text_overall_rating",
            "normalized_0_100": 33.22,
            "weight": 0.35
          },
          {
            "source_id": "livebench",
            "metric": "livebench_overall_mean_judgment",
            "normalized_0_100": 31.25,
            "weight": 0.2
          }
        ],
        "derived_from": [
          "lmarena:lmarena_text_overall_rating",
          "livebench:livebench_overall_mean_judgment"
        ],
        "limitations": [
          "This is an external model-intelligence prior, not a RunLocalAI local speed or fit measurement.",
          "Provider papers, human-preference ratings, and LiveBench judgments are not interchangeable with quantized local execution.",
          "Use this score only beside Will-It-Run fit, quantization, context, runtime, and measured tok/s evidence."
        ]
      },
      "scores": [
        {
          "source_id": "livebench",
          "metric": "livebench_category_coding_mean_judgment",
          "value": 0.5,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "coding",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench category aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_category_instruction_following_mean_judgment",
          "value": 0.25,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "instruction_following",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench category aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_category_language_mean_judgment",
          "value": 0,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "language",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench category aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_overall_mean_judgment",
          "value": 0.3125,
          "normalized_0_100": 31.25,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench judgment aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_task_coding_completion_mean_judgment",
          "value": 0,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "coding",
          "task": "coding_completion",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench task aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_task_connections_mean_judgment",
          "value": 0,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "language",
          "task": "connections",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench task aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_task_lcb_generation_mean_judgment",
          "value": 1,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "coding",
          "task": "LCB_generation",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench task aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_task_story_generation_mean_judgment",
          "value": 0.25,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "instruction_following",
          "task": "story_generation",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench task aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_chinese_rating",
          "value": 1022.9150381735258,
          "normalized_0_100": 36.27,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 304,
          "lower": 1003.0819128505524,
          "upper": 1042.7481634964993,
          "category": "chinese",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_overall_rating",
          "value": 997.2754273574565,
          "normalized_0_100": 33.22,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 347,
          "lower": 988.0287654815439,
          "upper": 1006.5220892333689,
          "category": "overall",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        }
      ]
    },
    {
      "model_key": "claude-3-7-sonnet-20250219-thinking-25k",
      "display_name": "claude-3-7-sonnet-20250219-thinking-25k",
      "provider": null,
      "aliases": [
        "claude-3-7-sonnet-20250219-thinking-25k"
      ],
      "openness": null,
      "license": null,
      "parameter_billions": null,
      "context_window": null,
      "modalities": [
        "text"
      ],
      "source_ids": [
        "livebench"
      ],
      "composite": {
        "score_0_100": 100,
        "confidence": "low",
        "components": [
          {
            "source_id": "livebench",
            "metric": "livebench_overall_mean_judgment",
            "normalized_0_100": 100,
            "weight": 0.2
          }
        ],
        "derived_from": [
          "livebench:livebench_overall_mean_judgment"
        ],
        "limitations": [
          "This is an external model-intelligence prior, not a RunLocalAI local speed or fit measurement.",
          "Provider papers, human-preference ratings, and LiveBench judgments are not interchangeable with quantized local execution.",
          "Use this score only beside Will-It-Run fit, quantization, context, runtime, and measured tok/s evidence."
        ]
      },
      "scores": [
        {
          "source_id": "livebench",
          "metric": "livebench_category_coding_mean_judgment",
          "value": 1,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "coding",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench category aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_category_instruction_following_mean_judgment",
          "value": 1,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "instruction_following",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench category aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_category_language_mean_judgment",
          "value": 1,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "language",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench category aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_overall_mean_judgment",
          "value": 1,
          "normalized_0_100": 100,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench judgment aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_task_coding_completion_mean_judgment",
          "value": 1,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "coding",
          "task": "coding_completion",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench task aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_task_lcb_generation_mean_judgment",
          "value": 1,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "coding",
          "task": "LCB_generation",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench task aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_task_paraphrase_mean_judgment",
          "value": 1,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "instruction_following",
          "task": "paraphrase",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench task aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_task_typos_mean_judgment",
          "value": 1,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "language",
          "task": "typos",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench task aggregate; not a RunLocalAI local execution measurement."
        }
      ]
    },
    {
      "model_key": "codegen3-5k-qwen2-5-72b-instruct-2-chk-50",
      "display_name": "codegen3_5k-qwen2.5-72b-instruct-2-chk-50",
      "provider": null,
      "aliases": [
        "codegen3_5k-qwen2.5-72b-instruct-2-chk-50"
      ],
      "openness": null,
      "license": null,
      "parameter_billions": null,
      "context_window": null,
      "modalities": [
        "text"
      ],
      "source_ids": [
        "livebench"
      ],
      "composite": {
        "score_0_100": 100,
        "confidence": "low",
        "components": [
          {
            "source_id": "livebench",
            "metric": "livebench_overall_mean_judgment",
            "normalized_0_100": 100,
            "weight": 0.2
          }
        ],
        "derived_from": [
          "livebench:livebench_overall_mean_judgment"
        ],
        "limitations": [
          "This is an external model-intelligence prior, not a RunLocalAI local speed or fit measurement.",
          "Provider papers, human-preference ratings, and LiveBench judgments are not interchangeable with quantized local execution.",
          "Use this score only beside Will-It-Run fit, quantization, context, runtime, and measured tok/s evidence."
        ]
      },
      "scores": [
        {
          "source_id": "livebench",
          "metric": "livebench_category_coding_mean_judgment",
          "value": 1,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "coding",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench category aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_category_instruction_following_mean_judgment",
          "value": 1,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "instruction_following",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench category aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_category_language_mean_judgment",
          "value": 1,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "language",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench category aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_overall_mean_judgment",
          "value": 1,
          "normalized_0_100": 100,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench judgment aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_task_coding_completion_mean_judgment",
          "value": 1,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "coding",
          "task": "coding_completion",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench task aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_task_connections_mean_judgment",
          "value": 1,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "language",
          "task": "connections",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench task aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_task_lcb_generation_mean_judgment",
          "value": 1,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "coding",
          "task": "LCB_generation",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench task aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_task_story_generation_mean_judgment",
          "value": 1,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "instruction_following",
          "task": "story_generation",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench task aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_task_typos_mean_judgment",
          "value": 1,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "language",
          "task": "typos",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench task aggregate; not a RunLocalAI local execution measurement."
        }
      ]
    },
    {
      "model_key": "coding2-amcfull-apifull-mmlu12k-meta-llama-3-1-70b-instruct-chk-150",
      "display_name": "coding2-amcfull-apifull-mmlu12k-meta-llama-3.1-70b-instruct-chk-150",
      "provider": null,
      "aliases": [
        "coding2-amcfull-apifull-mmlu12k-meta-llama-3.1-70b-instruct-chk-150"
      ],
      "openness": null,
      "license": null,
      "parameter_billions": null,
      "context_window": null,
      "modalities": [
        "text"
      ],
      "source_ids": [
        "livebench"
      ],
      "composite": {
        "score_0_100": 100,
        "confidence": "low",
        "components": [
          {
            "source_id": "livebench",
            "metric": "livebench_overall_mean_judgment",
            "normalized_0_100": 100,
            "weight": 0.2
          }
        ],
        "derived_from": [
          "livebench:livebench_overall_mean_judgment"
        ],
        "limitations": [
          "This is an external model-intelligence prior, not a RunLocalAI local speed or fit measurement.",
          "Provider papers, human-preference ratings, and LiveBench judgments are not interchangeable with quantized local execution.",
          "Use this score only beside Will-It-Run fit, quantization, context, runtime, and measured tok/s evidence."
        ]
      },
      "scores": [
        {
          "source_id": "livebench",
          "metric": "livebench_category_coding_mean_judgment",
          "value": 1,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "coding",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench category aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_category_instruction_following_mean_judgment",
          "value": 1,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "instruction_following",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench category aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_category_language_mean_judgment",
          "value": 1,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "language",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench category aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_overall_mean_judgment",
          "value": 1,
          "normalized_0_100": 100,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench judgment aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_task_coding_completion_mean_judgment",
          "value": 1,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "coding",
          "task": "coding_completion",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench task aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_task_connections_mean_judgment",
          "value": 1,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "language",
          "task": "connections",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench task aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_task_lcb_generation_mean_judgment",
          "value": 1,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "coding",
          "task": "LCB_generation",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench task aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_task_story_generation_mean_judgment",
          "value": 1,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "instruction_following",
          "task": "story_generation",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench task aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_task_typos_mean_judgment",
          "value": 1,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "language",
          "task": "typos",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench task aggregate; not a RunLocalAI local execution measurement."
        }
      ]
    },
    {
      "model_key": "deepseek-r1-local",
      "display_name": "deepseek-r1-local",
      "provider": null,
      "aliases": [
        "deepseek-r1-local"
      ],
      "openness": null,
      "license": null,
      "parameter_billions": null,
      "context_window": null,
      "modalities": [
        "text"
      ],
      "source_ids": [
        "livebench"
      ],
      "composite": {
        "score_0_100": 100,
        "confidence": "low",
        "components": [
          {
            "source_id": "livebench",
            "metric": "livebench_overall_mean_judgment",
            "normalized_0_100": 100,
            "weight": 0.2
          }
        ],
        "derived_from": [
          "livebench:livebench_overall_mean_judgment"
        ],
        "limitations": [
          "This is an external model-intelligence prior, not a RunLocalAI local speed or fit measurement.",
          "Provider papers, human-preference ratings, and LiveBench judgments are not interchangeable with quantized local execution.",
          "Use this score only beside Will-It-Run fit, quantization, context, runtime, and measured tok/s evidence."
        ]
      },
      "scores": [
        {
          "source_id": "livebench",
          "metric": "livebench_category_coding_mean_judgment",
          "value": 1,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "coding",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench category aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_overall_mean_judgment",
          "value": 1,
          "normalized_0_100": 100,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench judgment aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_task_lcb_generation_mean_judgment",
          "value": 1,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "coding",
          "task": "LCB_generation",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench task aggregate; not a RunLocalAI local execution measurement."
        }
      ]
    },
    {
      "model_key": "deepseek-r1-local-2",
      "display_name": "deepseek-r1-local-2",
      "provider": null,
      "aliases": [
        "deepseek-r1-local-2"
      ],
      "openness": null,
      "license": null,
      "parameter_billions": null,
      "context_window": null,
      "modalities": [
        "text"
      ],
      "source_ids": [
        "livebench"
      ],
      "composite": {
        "score_0_100": 100,
        "confidence": "low",
        "components": [
          {
            "source_id": "livebench",
            "metric": "livebench_overall_mean_judgment",
            "normalized_0_100": 100,
            "weight": 0.2
          }
        ],
        "derived_from": [
          "livebench:livebench_overall_mean_judgment"
        ],
        "limitations": [
          "This is an external model-intelligence prior, not a RunLocalAI local speed or fit measurement.",
          "Provider papers, human-preference ratings, and LiveBench judgments are not interchangeable with quantized local execution.",
          "Use this score only beside Will-It-Run fit, quantization, context, runtime, and measured tok/s evidence."
        ]
      },
      "scores": [
        {
          "source_id": "livebench",
          "metric": "livebench_category_coding_mean_judgment",
          "value": 1,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "coding",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench category aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_category_instruction_following_mean_judgment",
          "value": 1,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "instruction_following",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench category aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_category_language_mean_judgment",
          "value": 1,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "language",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench category aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_overall_mean_judgment",
          "value": 1,
          "normalized_0_100": 100,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench judgment aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_task_lcb_generation_mean_judgment",
          "value": 1,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "coding",
          "task": "LCB_generation",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench task aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_task_paraphrase_mean_judgment",
          "value": 1,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "instruction_following",
          "task": "paraphrase",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench task aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_task_typos_mean_judgment",
          "value": 1,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "language",
          "task": "typos",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench task aggregate; not a RunLocalAI local execution measurement."
        }
      ]
    },
    {
      "model_key": "o1",
      "display_name": "o1",
      "provider": null,
      "aliases": [
        "o1"
      ],
      "openness": null,
      "license": null,
      "parameter_billions": null,
      "context_window": null,
      "modalities": [
        "text"
      ],
      "source_ids": [
        "livebench"
      ],
      "composite": {
        "score_0_100": 100,
        "confidence": "low",
        "components": [
          {
            "source_id": "livebench",
            "metric": "livebench_overall_mean_judgment",
            "normalized_0_100": 100,
            "weight": 0.2
          }
        ],
        "derived_from": [
          "livebench:livebench_overall_mean_judgment"
        ],
        "limitations": [
          "This is an external model-intelligence prior, not a RunLocalAI local speed or fit measurement.",
          "Provider papers, human-preference ratings, and LiveBench judgments are not interchangeable with quantized local execution.",
          "Use this score only beside Will-It-Run fit, quantization, context, runtime, and measured tok/s evidence."
        ]
      },
      "scores": [
        {
          "source_id": "livebench",
          "metric": "livebench_category_coding_mean_judgment",
          "value": 1,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "coding",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench category aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_overall_mean_judgment",
          "value": 1,
          "normalized_0_100": 100,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench judgment aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_task_coding_completion_mean_judgment",
          "value": 1,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "coding",
          "task": "coding_completion",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench task aggregate; not a RunLocalAI local execution measurement."
        }
      ]
    },
    {
      "model_key": "smaug-qwen2-72b-instruct",
      "display_name": "Smaug-Qwen2-72B-Instruct",
      "provider": null,
      "aliases": [
        "Smaug-Qwen2-72B-Instruct"
      ],
      "openness": null,
      "license": null,
      "parameter_billions": null,
      "context_window": null,
      "modalities": [
        "text"
      ],
      "source_ids": [
        "livebench"
      ],
      "composite": {
        "score_0_100": 100,
        "confidence": "low",
        "components": [
          {
            "source_id": "livebench",
            "metric": "livebench_overall_mean_judgment",
            "normalized_0_100": 100,
            "weight": 0.2
          }
        ],
        "derived_from": [
          "livebench:livebench_overall_mean_judgment"
        ],
        "limitations": [
          "This is an external model-intelligence prior, not a RunLocalAI local speed or fit measurement.",
          "Provider papers, human-preference ratings, and LiveBench judgments are not interchangeable with quantized local execution.",
          "Use this score only beside Will-It-Run fit, quantization, context, runtime, and measured tok/s evidence."
        ]
      },
      "scores": [
        {
          "source_id": "livebench",
          "metric": "livebench_category_coding_mean_judgment",
          "value": 1,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "coding",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench category aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_overall_mean_judgment",
          "value": 1,
          "normalized_0_100": 100,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench judgment aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_task_lcb_generation_mean_judgment",
          "value": 1,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "coding",
          "task": "LCB_generation",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench task aggregate; not a RunLocalAI local execution measurement."
        }
      ]
    },
    {
      "model_key": "meta-llama-llama-3-1-405b",
      "display_name": "meta-llama/Llama-3.1-405B",
      "provider": "meta-llama",
      "aliases": [
        "meta-llama/Llama-3.1-405B"
      ],
      "openness": "open",
      "license": "Unknown",
      "parameter_billions": 405.9,
      "context_window": null,
      "modalities": [
        "text"
      ],
      "source_ids": [
        "openevals"
      ],
      "composite": {
        "score_0_100": 96.8,
        "confidence": "low",
        "components": [
          {
            "source_id": "openevals",
            "metric": "openevals_aggregate",
            "normalized_0_100": 96.8,
            "weight": 0.45
          }
        ],
        "derived_from": [
          "openevals:openevals_aggregate"
        ],
        "limitations": [
          "This is an external model-intelligence prior, not a RunLocalAI local speed or fit measurement.",
          "Provider papers, human-preference ratings, and LiveBench judgments are not interchangeable with quantized local execution.",
          "Use this score only beside Will-It-Run fit, quantization, context, runtime, and measured tok/s evidence."
        ]
      },
      "scores": [
        {
          "source_id": "openevals",
          "metric": "gsm8k",
          "value": 96.8,
          "normalized_0_100": 96.8,
          "scale": "0-100",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/OpenEvals/leaderboard-data",
          "source_dataset": "OpenEvals/leaderboard-data",
          "source_config": "default",
          "source_split": "train",
          "source_row_count": 105,
          "captured_at": "2026-05-28T11:57:28.766Z",
          "evidence_note": "External model quality prior from OpenEvals; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "openevals",
          "metric": "openevals_aggregate",
          "value": 96.8,
          "normalized_0_100": 96.8,
          "scale": "0-100",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/OpenEvals/leaderboard-data",
          "source_dataset": "OpenEvals/leaderboard-data",
          "source_config": "default",
          "source_split": "train",
          "source_row_count": 105,
          "captured_at": "2026-05-28T11:57:28.766Z",
          "evidence_note": "External model quality prior from OpenEvals; not a RunLocalAI local execution measurement."
        }
      ]
    },
    {
      "model_key": "claude-opus-4-6-thinking",
      "display_name": "claude-opus-4-6-thinking",
      "provider": "anthropic",
      "aliases": [
        "claude-opus-4-6-thinking"
      ],
      "openness": null,
      "license": "Proprietary",
      "parameter_billions": null,
      "context_window": null,
      "modalities": [
        "text"
      ],
      "source_ids": [
        "lmarena"
      ],
      "composite": {
        "score_0_100": 93,
        "confidence": "low",
        "components": [
          {
            "source_id": "lmarena",
            "metric": "lmarena_text_overall_rating",
            "normalized_0_100": 93,
            "weight": 0.35
          }
        ],
        "derived_from": [
          "lmarena:lmarena_text_overall_rating"
        ],
        "limitations": [
          "This is an external model-intelligence prior, not a RunLocalAI local speed or fit measurement.",
          "Provider papers, human-preference ratings, and LiveBench judgments are not interchangeable with quantized local execution.",
          "Use this score only beside Will-It-Run fit, quantization, context, runtime, and measured tok/s evidence."
        ]
      },
      "scores": [
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_chinese_rating",
          "value": 1547.0689558885629,
          "normalized_0_100": 98.6,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 3,
          "lower": 1530.1863357380798,
          "upper": 1563.951576039046,
          "category": "chinese",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_coding_rating",
          "value": 1537.6198080719232,
          "normalized_0_100": 97.47,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 2,
          "lower": 1529.6367147037488,
          "upper": 1545.6029014400976,
          "category": "coding",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_overall_rating",
          "value": 1500.0361791642981,
          "normalized_0_100": 93,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 1,
          "lower": 1495.632594807143,
          "upper": 1504.439763521453,
          "category": "overall",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        }
      ]
    },
    {
      "model_key": "claude-opus-4-6",
      "display_name": "claude-opus-4-6",
      "provider": "anthropic",
      "aliases": [
        "claude-opus-4-6"
      ],
      "openness": null,
      "license": "Proprietary",
      "parameter_billions": null,
      "context_window": null,
      "modalities": [
        "text"
      ],
      "source_ids": [
        "lmarena"
      ],
      "composite": {
        "score_0_100": 92.76,
        "confidence": "low",
        "components": [
          {
            "source_id": "lmarena",
            "metric": "lmarena_text_overall_rating",
            "normalized_0_100": 92.76,
            "weight": 0.35
          }
        ],
        "derived_from": [
          "lmarena:lmarena_text_overall_rating"
        ],
        "limitations": [
          "This is an external model-intelligence prior, not a RunLocalAI local speed or fit measurement.",
          "Provider papers, human-preference ratings, and LiveBench judgments are not interchangeable with quantized local execution.",
          "Use this score only beside Will-It-Run fit, quantization, context, runtime, and measured tok/s evidence."
        ]
      },
      "scores": [
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_chinese_rating",
          "value": 1549.9727009856358,
          "normalized_0_100": 98.94,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 2,
          "lower": 1533.4406303420722,
          "upper": 1566.5047716291992,
          "category": "chinese",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_coding_rating",
          "value": 1538.9138843742219,
          "normalized_0_100": 97.63,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 1,
          "lower": 1531.4032113754245,
          "upper": 1546.4245573730198,
          "category": "coding",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_overall_rating",
          "value": 1497.944030721996,
          "normalized_0_100": 92.76,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 2,
          "lower": 1493.646137178333,
          "upper": 1502.2419242656588,
          "category": "overall",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        }
      ]
    },
    {
      "model_key": "sonar-pro",
      "display_name": "sonar-pro",
      "provider": null,
      "aliases": [
        "sonar-pro"
      ],
      "openness": null,
      "license": null,
      "parameter_billions": null,
      "context_window": null,
      "modalities": [
        "text"
      ],
      "source_ids": [
        "livebench"
      ],
      "composite": {
        "score_0_100": 92.16,
        "confidence": "low",
        "components": [
          {
            "source_id": "livebench",
            "metric": "livebench_overall_mean_judgment",
            "normalized_0_100": 92.16,
            "weight": 0.2
          }
        ],
        "derived_from": [
          "livebench:livebench_overall_mean_judgment"
        ],
        "limitations": [
          "This is an external model-intelligence prior, not a RunLocalAI local speed or fit measurement.",
          "Provider papers, human-preference ratings, and LiveBench judgments are not interchangeable with quantized local execution.",
          "Use this score only beside Will-It-Run fit, quantization, context, runtime, and measured tok/s evidence."
        ]
      },
      "scores": [
        {
          "source_id": "livebench",
          "metric": "livebench_category_coding_mean_judgment",
          "value": 1,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "coding",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench category aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_category_instruction_following_mean_judgment",
          "value": 1,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "instruction_following",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench category aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_category_language_mean_judgment",
          "value": 0.764706,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "language",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench category aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_overall_mean_judgment",
          "value": 0.921569,
          "normalized_0_100": 92.16,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench judgment aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_task_coding_completion_mean_judgment",
          "value": 1,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "coding",
          "task": "coding_completion",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench task aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_task_paraphrase_mean_judgment",
          "value": 1,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "instruction_following",
          "task": "paraphrase",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench task aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_task_plot_unscrambling_mean_judgment",
          "value": 0.764706,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "language",
          "task": "plot_unscrambling",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench task aggregate; not a RunLocalAI local execution measurement."
        }
      ]
    },
    {
      "model_key": "gemini-3-5-flash",
      "display_name": "gemini-3.5-flash",
      "provider": "google",
      "aliases": [
        "gemini-3.5-flash"
      ],
      "openness": null,
      "license": "Proprietary",
      "parameter_billions": null,
      "context_window": null,
      "modalities": [
        "text"
      ],
      "source_ids": [
        "lmarena"
      ],
      "composite": {
        "score_0_100": 91.33,
        "confidence": "low",
        "components": [
          {
            "source_id": "lmarena",
            "metric": "lmarena_text_overall_rating",
            "normalized_0_100": 91.33,
            "weight": 0.35
          }
        ],
        "derived_from": [
          "lmarena:lmarena_text_overall_rating"
        ],
        "limitations": [
          "This is an external model-intelligence prior, not a RunLocalAI local speed or fit measurement.",
          "Provider papers, human-preference ratings, and LiveBench judgments are not interchangeable with quantized local execution.",
          "Use this score only beside Will-It-Run fit, quantization, context, runtime, and measured tok/s evidence."
        ]
      },
      "scores": [
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_chinese_rating",
          "value": 1530.1650378318159,
          "normalized_0_100": 96.59,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 10,
          "lower": 1497.1052262681694,
          "upper": 1563.2248493954621,
          "category": "chinese",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_coding_rating",
          "value": 1494.252065210401,
          "normalized_0_100": 92.32,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 12,
          "lower": 1479.4714823985933,
          "upper": 1509.0326480222088,
          "category": "coding",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_overall_rating",
          "value": 1485.9508700785311,
          "normalized_0_100": 91.33,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 3,
          "lower": 1478.2635901394822,
          "upper": 1493.6381500175803,
          "category": "overall",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        }
      ]
    },
    {
      "model_key": "claude-opus-4-7-thinking",
      "display_name": "claude-opus-4-7-thinking",
      "provider": "anthropic",
      "aliases": [
        "claude-opus-4-7-thinking"
      ],
      "openness": null,
      "license": "Proprietary",
      "parameter_billions": null,
      "context_window": null,
      "modalities": [
        "text"
      ],
      "source_ids": [
        "lmarena"
      ],
      "composite": {
        "score_0_100": 91.32,
        "confidence": "low",
        "components": [
          {
            "source_id": "lmarena",
            "metric": "lmarena_text_overall_rating",
            "normalized_0_100": 91.32,
            "weight": 0.35
          }
        ],
        "derived_from": [
          "lmarena:lmarena_text_overall_rating"
        ],
        "limitations": [
          "This is an external model-intelligence prior, not a RunLocalAI local speed or fit measurement.",
          "Provider papers, human-preference ratings, and LiveBench judgments are not interchangeable with quantized local execution.",
          "Use this score only beside Will-It-Run fit, quantization, context, runtime, and measured tok/s evidence."
        ]
      },
      "scores": [
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_chinese_rating",
          "value": 1542.7675591301,
          "normalized_0_100": 98.09,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 4,
          "lower": 1520.494505736256,
          "upper": 1565.0406125239435,
          "category": "chinese",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_coding_rating",
          "value": 1525.5067084233472,
          "normalized_0_100": 96.03,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 3,
          "lower": 1514.98331575473,
          "upper": 1536.0301010919643,
          "category": "coding",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_overall_rating",
          "value": 1485.9017559127553,
          "normalized_0_100": 91.32,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 4,
          "lower": 1480.1490682638275,
          "upper": 1491.6544435616834,
          "category": "overall",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        }
      ]
    },
    {
      "model_key": "microsoft-phi-3-medium-4k-instruct",
      "display_name": "microsoft/Phi-3-medium-4k-instruct",
      "provider": "microsoft",
      "aliases": [
        "microsoft/Phi-3-medium-4k-instruct"
      ],
      "openness": "open",
      "license": "Unknown",
      "parameter_billions": 14,
      "context_window": null,
      "modalities": [
        "text"
      ],
      "source_ids": [
        "openevals"
      ],
      "composite": {
        "score_0_100": 91,
        "confidence": "low",
        "components": [
          {
            "source_id": "openevals",
            "metric": "openevals_aggregate",
            "normalized_0_100": 91,
            "weight": 0.45
          }
        ],
        "derived_from": [
          "openevals:openevals_aggregate"
        ],
        "limitations": [
          "This is an external model-intelligence prior, not a RunLocalAI local speed or fit measurement.",
          "Provider papers, human-preference ratings, and LiveBench judgments are not interchangeable with quantized local execution.",
          "Use this score only beside Will-It-Run fit, quantization, context, runtime, and measured tok/s evidence."
        ]
      },
      "scores": [
        {
          "source_id": "openevals",
          "metric": "gsm8k",
          "value": 91,
          "normalized_0_100": 91,
          "scale": "0-100",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/OpenEvals/leaderboard-data",
          "source_dataset": "OpenEvals/leaderboard-data",
          "source_config": "default",
          "source_split": "train",
          "source_row_count": 105,
          "captured_at": "2026-05-28T11:57:28.766Z",
          "evidence_note": "External model quality prior from OpenEvals; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "openevals",
          "metric": "openevals_aggregate",
          "value": 91,
          "normalized_0_100": 91,
          "scale": "0-100",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/OpenEvals/leaderboard-data",
          "source_dataset": "OpenEvals/leaderboard-data",
          "source_config": "default",
          "source_split": "train",
          "source_row_count": 105,
          "captured_at": "2026-05-28T11:57:28.766Z",
          "evidence_note": "External model quality prior from OpenEvals; not a RunLocalAI local execution measurement."
        }
      ]
    },
    {
      "model_key": "gemini-3-1-pro-preview",
      "display_name": "gemini-3.1-pro-preview",
      "provider": "google",
      "aliases": [
        "gemini-3.1-pro-preview"
      ],
      "openness": null,
      "license": "Proprietary",
      "parameter_billions": null,
      "context_window": null,
      "modalities": [
        "text"
      ],
      "source_ids": [
        "lmarena"
      ],
      "composite": {
        "score_0_100": 90.96,
        "confidence": "low",
        "components": [
          {
            "source_id": "lmarena",
            "metric": "lmarena_text_overall_rating",
            "normalized_0_100": 90.96,
            "weight": 0.35
          }
        ],
        "derived_from": [
          "lmarena:lmarena_text_overall_rating"
        ],
        "limitations": [
          "This is an external model-intelligence prior, not a RunLocalAI local speed or fit measurement.",
          "Provider papers, human-preference ratings, and LiveBench judgments are not interchangeable with quantized local execution.",
          "Use this score only beside Will-It-Run fit, quantization, context, runtime, and measured tok/s evidence."
        ]
      },
      "scores": [
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_chinese_rating",
          "value": 1535.9991766357118,
          "normalized_0_100": 97.28,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 7,
          "lower": 1521.0417960752757,
          "upper": 1550.9565571961477,
          "category": "chinese",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_coding_rating",
          "value": 1488.3691465938523,
          "normalized_0_100": 91.62,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 16,
          "lower": 1481.3334429110184,
          "upper": 1495.4048502766861,
          "category": "coding",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_overall_rating",
          "value": 1482.8134614129021,
          "normalized_0_100": 90.96,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 5,
          "lower": 1478.5703109015403,
          "upper": 1487.056611924264,
          "category": "overall",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        }
      ]
    },
    {
      "model_key": "grok-2",
      "display_name": "grok-2",
      "provider": null,
      "aliases": [
        "grok-2"
      ],
      "openness": null,
      "license": null,
      "parameter_billions": null,
      "context_window": null,
      "modalities": [
        "text"
      ],
      "source_ids": [
        "livebench"
      ],
      "composite": {
        "score_0_100": 90.59,
        "confidence": "low",
        "components": [
          {
            "source_id": "livebench",
            "metric": "livebench_overall_mean_judgment",
            "normalized_0_100": 90.59,
            "weight": 0.2
          }
        ],
        "derived_from": [
          "livebench:livebench_overall_mean_judgment"
        ],
        "limitations": [
          "This is an external model-intelligence prior, not a RunLocalAI local speed or fit measurement.",
          "Provider papers, human-preference ratings, and LiveBench judgments are not interchangeable with quantized local execution.",
          "Use this score only beside Will-It-Run fit, quantization, context, runtime, and measured tok/s evidence."
        ]
      },
      "scores": [
        {
          "source_id": "livebench",
          "metric": "livebench_category_coding_mean_judgment",
          "value": 1,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "coding",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench category aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_category_instruction_following_mean_judgment",
          "value": 1,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "instruction_following",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench category aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_category_language_mean_judgment",
          "value": 0.843137,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "language",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench category aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_overall_mean_judgment",
          "value": 0.905882,
          "normalized_0_100": 90.59,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench judgment aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_task_connections_mean_judgment",
          "value": 1,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "language",
          "task": "connections",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench task aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_task_lcb_generation_mean_judgment",
          "value": 1,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "coding",
          "task": "LCB_generation",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench task aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_task_plot_unscrambling_mean_judgment",
          "value": 0.529412,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "language",
          "task": "plot_unscrambling",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench task aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_task_story_generation_mean_judgment",
          "value": 1,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "instruction_following",
          "task": "story_generation",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench task aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_task_typos_mean_judgment",
          "value": 1,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "language",
          "task": "typos",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench task aggregate; not a RunLocalAI local execution measurement."
        }
      ]
    },
    {
      "model_key": "gemini-3-pro",
      "display_name": "gemini-3-pro",
      "provider": "google",
      "aliases": [
        "gemini-3-pro"
      ],
      "openness": null,
      "license": "Proprietary",
      "parameter_billions": null,
      "context_window": null,
      "modalities": [
        "text"
      ],
      "source_ids": [
        "lmarena"
      ],
      "composite": {
        "score_0_100": 90.57,
        "confidence": "low",
        "components": [
          {
            "source_id": "lmarena",
            "metric": "lmarena_text_overall_rating",
            "normalized_0_100": 90.57,
            "weight": 0.35
          }
        ],
        "derived_from": [
          "lmarena:lmarena_text_overall_rating"
        ],
        "limitations": [
          "This is an external model-intelligence prior, not a RunLocalAI local speed or fit measurement.",
          "Provider papers, human-preference ratings, and LiveBench judgments are not interchangeable with quantized local execution.",
          "Use this score only beside Will-It-Run fit, quantization, context, runtime, and measured tok/s evidence."
        ]
      },
      "scores": [
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_chinese_rating",
          "value": 1527.8206982458025,
          "normalized_0_100": 96.31,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 11,
          "lower": 1514.2024711762047,
          "upper": 1541.4389253154006,
          "category": "chinese",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_coding_rating",
          "value": 1482.7754349156664,
          "normalized_0_100": 90.95,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 22,
          "lower": 1475.6964873581387,
          "upper": 1489.8543824731944,
          "category": "coding",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_overall_rating",
          "value": 1479.5524915489023,
          "normalized_0_100": 90.57,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 6,
          "lower": 1475.7095020901666,
          "upper": 1483.3954810076377,
          "category": "overall",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        }
      ]
    },
    {
      "model_key": "claude-opus-4-7",
      "display_name": "claude-opus-4-7",
      "provider": "anthropic",
      "aliases": [
        "claude-opus-4-7"
      ],
      "openness": null,
      "license": "Proprietary",
      "parameter_billions": null,
      "context_window": null,
      "modalities": [
        "text"
      ],
      "source_ids": [
        "lmarena"
      ],
      "composite": {
        "score_0_100": 90.51,
        "confidence": "low",
        "components": [
          {
            "source_id": "lmarena",
            "metric": "lmarena_text_overall_rating",
            "normalized_0_100": 90.51,
            "weight": 0.35
          }
        ],
        "derived_from": [
          "lmarena:lmarena_text_overall_rating"
        ],
        "limitations": [
          "This is an external model-intelligence prior, not a RunLocalAI local speed or fit measurement.",
          "Provider papers, human-preference ratings, and LiveBench judgments are not interchangeable with quantized local execution.",
          "Use this score only beside Will-It-Run fit, quantization, context, runtime, and measured tok/s evidence."
        ]
      },
      "scores": [
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_chinese_rating",
          "value": 1537.1513304012228,
          "normalized_0_100": 97.42,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 5,
          "lower": 1513.3448622925102,
          "upper": 1560.9577985099354,
          "category": "chinese",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_coding_rating",
          "value": 1520.9221740597434,
          "normalized_0_100": 95.49,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 4,
          "lower": 1510.8293216322259,
          "upper": 1531.0150264872607,
          "category": "coding",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_overall_rating",
          "value": 1479.0642983884832,
          "normalized_0_100": 90.51,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 7,
          "lower": 1473.3540125449458,
          "upper": 1484.7745842320203,
          "category": "overall",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        }
      ]
    },
    {
      "model_key": "qwen3-7-max-preview",
      "display_name": "qwen3.7-max-preview",
      "provider": "alibaba",
      "aliases": [
        "qwen3.7-max-preview"
      ],
      "openness": null,
      "license": "Proprietary",
      "parameter_billions": null,
      "context_window": null,
      "modalities": [
        "text"
      ],
      "source_ids": [
        "lmarena"
      ],
      "composite": {
        "score_0_100": 90.06,
        "confidence": "low",
        "components": [
          {
            "source_id": "lmarena",
            "metric": "lmarena_text_overall_rating",
            "normalized_0_100": 90.06,
            "weight": 0.35
          }
        ],
        "derived_from": [
          "lmarena:lmarena_text_overall_rating"
        ],
        "limitations": [
          "This is an external model-intelligence prior, not a RunLocalAI local speed or fit measurement.",
          "Provider papers, human-preference ratings, and LiveBench judgments are not interchangeable with quantized local execution.",
          "Use this score only beside Will-It-Run fit, quantization, context, runtime, and measured tok/s evidence."
        ]
      },
      "scores": [
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_chinese_rating",
          "value": 1526.4722574429843,
          "normalized_0_100": 96.15,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 13,
          "lower": 1488.5892475953424,
          "upper": 1564.3552672906262,
          "category": "chinese",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_coding_rating",
          "value": 1497.5206735483744,
          "normalized_0_100": 92.71,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 10,
          "lower": 1479.1969566404673,
          "upper": 1515.8443904562814,
          "category": "coding",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_overall_rating",
          "value": 1475.2630335106926,
          "normalized_0_100": 90.06,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 8,
          "lower": 1465.2922528502083,
          "upper": 1485.2338141711768,
          "category": "overall",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        }
      ]
    },
    {
      "model_key": "muse-spark",
      "display_name": "muse-spark",
      "provider": "meta",
      "aliases": [
        "muse-spark"
      ],
      "openness": null,
      "license": "Proprietary",
      "parameter_billions": null,
      "context_window": null,
      "modalities": [
        "text"
      ],
      "source_ids": [
        "lmarena"
      ],
      "composite": {
        "score_0_100": 90.03,
        "confidence": "low",
        "components": [
          {
            "source_id": "lmarena",
            "metric": "lmarena_text_overall_rating",
            "normalized_0_100": 90.03,
            "weight": 0.35
          }
        ],
        "derived_from": [
          "lmarena:lmarena_text_overall_rating"
        ],
        "limitations": [
          "This is an external model-intelligence prior, not a RunLocalAI local speed or fit measurement.",
          "Provider papers, human-preference ratings, and LiveBench judgments are not interchangeable with quantized local execution.",
          "Use this score only beside Will-It-Run fit, quantization, context, runtime, and measured tok/s evidence."
        ]
      },
      "scores": [
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_chinese_rating",
          "value": 1515.1701951728262,
          "normalized_0_100": 94.8,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 16,
          "lower": 1491.341236192155,
          "upper": 1538.9991541534973,
          "category": "chinese",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_coding_rating",
          "value": 1481.2724687591028,
          "normalized_0_100": 90.77,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 25,
          "lower": 1470.2342958305758,
          "upper": 1492.31064168763,
          "category": "coding",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_overall_rating",
          "value": 1475.0193908940637,
          "normalized_0_100": 90.03,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 9,
          "lower": 1468.9285871615539,
          "upper": 1481.1101946265735,
          "category": "overall",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        }
      ]
    },
    {
      "model_key": "ernie-5-1",
      "display_name": "ernie-5.1",
      "provider": "baidu",
      "aliases": [
        "ernie-5.1"
      ],
      "openness": null,
      "license": "Proprietary",
      "parameter_billions": null,
      "context_window": null,
      "modalities": [
        "text"
      ],
      "source_ids": [
        "lmarena"
      ],
      "composite": {
        "score_0_100": 89.69,
        "confidence": "low",
        "components": [
          {
            "source_id": "lmarena",
            "metric": "lmarena_text_overall_rating",
            "normalized_0_100": 89.69,
            "weight": 0.35
          }
        ],
        "derived_from": [
          "lmarena:lmarena_text_overall_rating"
        ],
        "limitations": [
          "This is an external model-intelligence prior, not a RunLocalAI local speed or fit measurement.",
          "Provider papers, human-preference ratings, and LiveBench judgments are not interchangeable with quantized local execution.",
          "Use this score only beside Will-It-Run fit, quantization, context, runtime, and measured tok/s evidence."
        ]
      },
      "scores": [
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_chinese_rating",
          "value": 1500.80620865845,
          "normalized_0_100": 93.1,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 26,
          "lower": 1472.7446288087529,
          "upper": 1528.8677885081472,
          "category": "chinese",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_coding_rating",
          "value": 1491.1526618940175,
          "normalized_0_100": 91.95,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 14,
          "lower": 1478.6043207137925,
          "upper": 1503.7010030742424,
          "category": "coding",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_overall_rating",
          "value": 1472.164841199562,
          "normalized_0_100": 89.69,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 10,
          "lower": 1465.612979015554,
          "upper": 1478.71670338357,
          "category": "overall",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        }
      ]
    },
    {
      "model_key": "gpt-5-4-high",
      "display_name": "gpt-5.4-high",
      "provider": "openai",
      "aliases": [
        "gpt-5.4-high"
      ],
      "openness": null,
      "license": "Proprietary",
      "parameter_billions": null,
      "context_window": null,
      "modalities": [
        "text"
      ],
      "source_ids": [
        "lmarena"
      ],
      "composite": {
        "score_0_100": 89.59,
        "confidence": "low",
        "components": [
          {
            "source_id": "lmarena",
            "metric": "lmarena_text_overall_rating",
            "normalized_0_100": 89.59,
            "weight": 0.35
          }
        ],
        "derived_from": [
          "lmarena:lmarena_text_overall_rating"
        ],
        "limitations": [
          "This is an external model-intelligence prior, not a RunLocalAI local speed or fit measurement.",
          "Provider papers, human-preference ratings, and LiveBench judgments are not interchangeable with quantized local execution.",
          "Use this score only beside Will-It-Run fit, quantization, context, runtime, and measured tok/s evidence."
        ]
      },
      "scores": [
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_chinese_rating",
          "value": 1518.0754247691325,
          "normalized_0_100": 95.15,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 15,
          "lower": 1499.6000060373913,
          "upper": 1536.5508435008737,
          "category": "chinese",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_coding_rating",
          "value": 1500.7946133531827,
          "normalized_0_100": 93.09,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 6,
          "lower": 1492.102609455095,
          "upper": 1509.4866172512707,
          "category": "coding",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_overall_rating",
          "value": 1471.3478705351126,
          "normalized_0_100": 89.59,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 11,
          "lower": 1466.4054603116144,
          "upper": 1476.290280758611,
          "category": "overall",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        }
      ]
    },
    {
      "model_key": "qwen-qwen2-72b",
      "display_name": "Qwen/Qwen2-72B",
      "provider": "Qwen",
      "aliases": [
        "Qwen/Qwen2-72B"
      ],
      "openness": "open",
      "license": "Unknown",
      "parameter_billions": 72.7,
      "context_window": null,
      "modalities": [
        "text"
      ],
      "source_ids": [
        "openevals"
      ],
      "composite": {
        "score_0_100": 89.5,
        "confidence": "low",
        "components": [
          {
            "source_id": "openevals",
            "metric": "openevals_aggregate",
            "normalized_0_100": 89.5,
            "weight": 0.45
          }
        ],
        "derived_from": [
          "openevals:openevals_aggregate"
        ],
        "limitations": [
          "This is an external model-intelligence prior, not a RunLocalAI local speed or fit measurement.",
          "Provider papers, human-preference ratings, and LiveBench judgments are not interchangeable with quantized local execution.",
          "Use this score only beside Will-It-Run fit, quantization, context, runtime, and measured tok/s evidence."
        ]
      },
      "scores": [
        {
          "source_id": "openevals",
          "metric": "gsm8k",
          "value": 89.5,
          "normalized_0_100": 89.5,
          "scale": "0-100",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/OpenEvals/leaderboard-data",
          "source_dataset": "OpenEvals/leaderboard-data",
          "source_config": "default",
          "source_split": "train",
          "source_row_count": 105,
          "captured_at": "2026-05-28T11:57:28.766Z",
          "evidence_note": "External model quality prior from OpenEvals; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "openevals",
          "metric": "openevals_aggregate",
          "value": 89.5,
          "normalized_0_100": 89.5,
          "scale": "0-100",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/OpenEvals/leaderboard-data",
          "source_dataset": "OpenEvals/leaderboard-data",
          "source_config": "default",
          "source_split": "train",
          "source_row_count": 105,
          "captured_at": "2026-05-28T11:57:28.766Z",
          "evidence_note": "External model quality prior from OpenEvals; not a RunLocalAI local execution measurement."
        }
      ]
    },
    {
      "model_key": "qwen3-5-max-preview",
      "display_name": "qwen3.5-max-preview",
      "provider": "alibaba",
      "aliases": [
        "qwen3.5-max-preview"
      ],
      "openness": null,
      "license": "Proprietary",
      "parameter_billions": null,
      "context_window": null,
      "modalities": [
        "text"
      ],
      "source_ids": [
        "lmarena"
      ],
      "composite": {
        "score_0_100": 89.43,
        "confidence": "low",
        "components": [
          {
            "source_id": "lmarena",
            "metric": "lmarena_text_overall_rating",
            "normalized_0_100": 89.43,
            "weight": 0.35
          }
        ],
        "derived_from": [
          "lmarena:lmarena_text_overall_rating"
        ],
        "limitations": [
          "This is an external model-intelligence prior, not a RunLocalAI local speed or fit measurement.",
          "Provider papers, human-preference ratings, and LiveBench judgments are not interchangeable with quantized local execution.",
          "Use this score only beside Will-It-Run fit, quantization, context, runtime, and measured tok/s evidence."
        ]
      },
      "scores": [
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_chinese_rating",
          "value": 1535.0241052408182,
          "normalized_0_100": 97.16,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 8,
          "lower": 1515.2473590304764,
          "upper": 1554.8008514511598,
          "category": "chinese",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_coding_rating",
          "value": 1484.1747159498225,
          "normalized_0_100": 91.12,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 21,
          "lower": 1475.29341238017,
          "upper": 1493.0560195194753,
          "category": "coding",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_overall_rating",
          "value": 1470.0207538670704,
          "normalized_0_100": 89.43,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 12,
          "lower": 1464.8240225424984,
          "upper": 1475.2174851916423,
          "category": "overall",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        }
      ]
    },
    {
      "model_key": "claude-3-7-sonnet-20250219-thinking-64k",
      "display_name": "claude-3-7-sonnet-20250219-thinking-64k",
      "provider": null,
      "aliases": [
        "claude-3-7-sonnet-20250219-thinking-64k"
      ],
      "openness": null,
      "license": null,
      "parameter_billions": null,
      "context_window": null,
      "modalities": [
        "text"
      ],
      "source_ids": [
        "livebench"
      ],
      "composite": {
        "score_0_100": 89.35,
        "confidence": "low",
        "components": [
          {
            "source_id": "livebench",
            "metric": "livebench_overall_mean_judgment",
            "normalized_0_100": 89.35,
            "weight": 0.2
          }
        ],
        "derived_from": [
          "livebench:livebench_overall_mean_judgment"
        ],
        "limitations": [
          "This is an external model-intelligence prior, not a RunLocalAI local speed or fit measurement.",
          "Provider papers, human-preference ratings, and LiveBench judgments are not interchangeable with quantized local execution.",
          "Use this score only beside Will-It-Run fit, quantization, context, runtime, and measured tok/s evidence."
        ]
      },
      "scores": [
        {
          "source_id": "livebench",
          "metric": "livebench_category_coding_mean_judgment",
          "value": 1,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "coding",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench category aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_category_instruction_following_mean_judgment",
          "value": 1,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "instruction_following",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench category aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_category_language_mean_judgment",
          "value": 0.813636,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "language",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench category aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_overall_mean_judgment",
          "value": 0.893506,
          "normalized_0_100": 89.35,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench judgment aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_task_coding_completion_mean_judgment",
          "value": 1,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "coding",
          "task": "coding_completion",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench task aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_task_lcb_generation_mean_judgment",
          "value": 1,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "coding",
          "task": "LCB_generation",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench task aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_task_paraphrase_mean_judgment",
          "value": 1,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "instruction_following",
          "task": "paraphrase",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench task aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_task_plot_unscrambling_mean_judgment",
          "value": 0.254545,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "language",
          "task": "plot_unscrambling",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench task aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_task_typos_mean_judgment",
          "value": 1,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "language",
          "task": "typos",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench task aggregate; not a RunLocalAI local execution measurement."
        }
      ]
    },
    {
      "model_key": "glm-5-1",
      "display_name": "glm-5.1",
      "provider": "zai",
      "aliases": [
        "glm-5.1"
      ],
      "openness": null,
      "license": "MIT",
      "parameter_billions": null,
      "context_window": null,
      "modalities": [
        "text"
      ],
      "source_ids": [
        "lmarena"
      ],
      "composite": {
        "score_0_100": 89.3,
        "confidence": "low",
        "components": [
          {
            "source_id": "lmarena",
            "metric": "lmarena_text_overall_rating",
            "normalized_0_100": 89.3,
            "weight": 0.35
          }
        ],
        "derived_from": [
          "lmarena:lmarena_text_overall_rating"
        ],
        "limitations": [
          "This is an external model-intelligence prior, not a RunLocalAI local speed or fit measurement.",
          "Provider papers, human-preference ratings, and LiveBench judgments are not interchangeable with quantized local execution.",
          "Use this score only beside Will-It-Run fit, quantization, context, runtime, and measured tok/s evidence."
        ]
      },
      "scores": [
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_chinese_rating",
          "value": 1514.406837272058,
          "normalized_0_100": 94.71,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 17,
          "lower": 1491.403876265304,
          "upper": 1537.4097982788119,
          "category": "chinese",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_coding_rating",
          "value": 1500.6618628453268,
          "normalized_0_100": 93.08,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 7,
          "lower": 1489.8654776448002,
          "upper": 1511.4582480458537,
          "category": "coding",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_overall_rating",
          "value": 1468.9005527192141,
          "normalized_0_100": 89.3,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 13,
          "lower": 1462.96832139287,
          "upper": 1474.8327840455584,
          "category": "overall",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        }
      ]
    },
    {
      "model_key": "gpt-5-5-high",
      "display_name": "gpt-5.5-high",
      "provider": "openai",
      "aliases": [
        "gpt-5.5-high"
      ],
      "openness": null,
      "license": "Proprietary",
      "parameter_billions": null,
      "context_window": null,
      "modalities": [
        "text"
      ],
      "source_ids": [
        "lmarena"
      ],
      "composite": {
        "score_0_100": 89.1,
        "confidence": "low",
        "components": [
          {
            "source_id": "lmarena",
            "metric": "lmarena_text_overall_rating",
            "normalized_0_100": 89.1,
            "weight": 0.35
          }
        ],
        "derived_from": [
          "lmarena:lmarena_text_overall_rating"
        ],
        "limitations": [
          "This is an external model-intelligence prior, not a RunLocalAI local speed or fit measurement.",
          "Provider papers, human-preference ratings, and LiveBench judgments are not interchangeable with quantized local execution.",
          "Use this score only beside Will-It-Run fit, quantization, context, runtime, and measured tok/s evidence."
        ]
      },
      "scores": [
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_chinese_rating",
          "value": 1536.9267753393933,
          "normalized_0_100": 97.39,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 6,
          "lower": 1510.413419105139,
          "upper": 1563.4401315736477,
          "category": "chinese",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_coding_rating",
          "value": 1489.6215334668536,
          "normalized_0_100": 91.77,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 15,
          "lower": 1477.8892024704078,
          "upper": 1501.3538644632997,
          "category": "coding",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_overall_rating",
          "value": 1467.1856086068572,
          "normalized_0_100": 89.1,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 14,
          "lower": 1460.9429100316825,
          "upper": 1473.4283071820319,
          "category": "overall",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        }
      ]
    },
    {
      "model_key": "gemini-3-flash",
      "display_name": "gemini-3-flash",
      "provider": "google",
      "aliases": [
        "gemini-3-flash"
      ],
      "openness": null,
      "license": "Proprietary",
      "parameter_billions": null,
      "context_window": null,
      "modalities": [
        "text"
      ],
      "source_ids": [
        "lmarena"
      ],
      "composite": {
        "score_0_100": 89,
        "confidence": "low",
        "components": [
          {
            "source_id": "lmarena",
            "metric": "lmarena_text_overall_rating",
            "normalized_0_100": 89,
            "weight": 0.35
          }
        ],
        "derived_from": [
          "lmarena:lmarena_text_overall_rating"
        ],
        "limitations": [
          "This is an external model-intelligence prior, not a RunLocalAI local speed or fit measurement.",
          "Provider papers, human-preference ratings, and LiveBench judgments are not interchangeable with quantized local execution.",
          "Use this score only beside Will-It-Run fit, quantization, context, runtime, and measured tok/s evidence."
        ]
      },
      "scores": [
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_chinese_rating",
          "value": 1512.771173191495,
          "normalized_0_100": 94.52,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 18,
          "lower": 1497.3429690493388,
          "upper": 1528.1993773336515,
          "category": "chinese",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_coding_rating",
          "value": 1463.5227107064904,
          "normalized_0_100": 88.66,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 38,
          "lower": 1455.5803392141602,
          "upper": 1471.4650821988207,
          "category": "coding",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_overall_rating",
          "value": 1466.366108874159,
          "normalized_0_100": 89,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 15,
          "lower": 1461.9636756563998,
          "upper": 1470.768542091918,
          "category": "overall",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        }
      ]
    },
    {
      "model_key": "gemini-2-5-pro-exp-03-25",
      "display_name": "gemini-2.5-pro-exp-03-25",
      "provider": null,
      "aliases": [
        "gemini-2.5-pro-exp-03-25"
      ],
      "openness": null,
      "license": null,
      "parameter_billions": null,
      "context_window": null,
      "modalities": [
        "text"
      ],
      "source_ids": [
        "livebench"
      ],
      "composite": {
        "score_0_100": 88.95,
        "confidence": "low",
        "components": [
          {
            "source_id": "livebench",
            "metric": "livebench_overall_mean_judgment",
            "normalized_0_100": 88.95,
            "weight": 0.2
          }
        ],
        "derived_from": [
          "livebench:livebench_overall_mean_judgment"
        ],
        "limitations": [
          "This is an external model-intelligence prior, not a RunLocalAI local speed or fit measurement.",
          "Provider papers, human-preference ratings, and LiveBench judgments are not interchangeable with quantized local execution.",
          "Use this score only beside Will-It-Run fit, quantization, context, runtime, and measured tok/s evidence."
        ]
      },
      "scores": [
        {
          "source_id": "livebench",
          "metric": "livebench_category_coding_mean_judgment",
          "value": 1,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "coding",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench category aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_category_instruction_following_mean_judgment",
          "value": 1,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "instruction_following",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench category aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_category_language_mean_judgment",
          "value": 0.834225,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "language",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench category aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_overall_mean_judgment",
          "value": 0.889483,
          "normalized_0_100": 88.95,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench judgment aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_task_lcb_generation_mean_judgment",
          "value": 1,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "coding",
          "task": "LCB_generation",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench task aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_task_paraphrase_mean_judgment",
          "value": 1,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "instruction_following",
          "task": "paraphrase",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench task aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_task_plot_unscrambling_mean_judgment",
          "value": 0.668449,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "language",
          "task": "plot_unscrambling",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench task aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_task_typos_mean_judgment",
          "value": 1,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "language",
          "task": "typos",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench task aggregate; not a RunLocalAI local execution measurement."
        }
      ]
    },
    {
      "model_key": "gpt-5-5",
      "display_name": "gpt-5.5",
      "provider": "openai",
      "aliases": [
        "gpt-5.5"
      ],
      "openness": null,
      "license": "Proprietary",
      "parameter_billions": null,
      "context_window": null,
      "modalities": [
        "text"
      ],
      "source_ids": [
        "lmarena"
      ],
      "composite": {
        "score_0_100": 88.83,
        "confidence": "low",
        "components": [
          {
            "source_id": "lmarena",
            "metric": "lmarena_text_overall_rating",
            "normalized_0_100": 88.83,
            "weight": 0.35
          }
        ],
        "derived_from": [
          "lmarena:lmarena_text_overall_rating"
        ],
        "limitations": [
          "This is an external model-intelligence prior, not a RunLocalAI local speed or fit measurement.",
          "Provider papers, human-preference ratings, and LiveBench judgments are not interchangeable with quantized local execution.",
          "Use this score only beside Will-It-Run fit, quantization, context, runtime, and measured tok/s evidence."
        ]
      },
      "scores": [
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_chinese_rating",
          "value": 1558.867382617796,
          "normalized_0_100": 100,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 1,
          "lower": 1530.4520361795555,
          "upper": 1587.2827290560367,
          "category": "chinese",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_coding_rating",
          "value": 1482.7421168475291,
          "normalized_0_100": 90.95,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 23,
          "lower": 1471.3386271165143,
          "upper": 1494.1456065785442,
          "category": "coding",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_overall_rating",
          "value": 1464.9340232596014,
          "normalized_0_100": 88.83,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 16,
          "lower": 1458.6934563827429,
          "upper": 1471.17459013646,
          "category": "overall",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        }
      ]
    },
    {
      "model_key": "mimo-v2-5-pro",
      "display_name": "mimo-v2.5-pro",
      "provider": "xiaomi",
      "aliases": [
        "mimo-v2.5-pro"
      ],
      "openness": null,
      "license": "MIT",
      "parameter_billions": null,
      "context_window": null,
      "modalities": [
        "text"
      ],
      "source_ids": [
        "lmarena"
      ],
      "composite": {
        "score_0_100": 88.36,
        "confidence": "low",
        "components": [
          {
            "source_id": "lmarena",
            "metric": "lmarena_text_overall_rating",
            "normalized_0_100": 88.36,
            "weight": 0.35
          }
        ],
        "derived_from": [
          "lmarena:lmarena_text_overall_rating"
        ],
        "limitations": [
          "This is an external model-intelligence prior, not a RunLocalAI local speed or fit measurement.",
          "Provider papers, human-preference ratings, and LiveBench judgments are not interchangeable with quantized local execution.",
          "Use this score only beside Will-It-Run fit, quantization, context, runtime, and measured tok/s evidence."
        ]
      },
      "scores": [
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_chinese_rating",
          "value": 1493.0485210003078,
          "normalized_0_100": 92.17,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 29,
          "lower": 1467.6654106046951,
          "upper": 1518.4316313959202,
          "category": "chinese",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_coding_rating",
          "value": 1498.5259300023747,
          "normalized_0_100": 92.82,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 9,
          "lower": 1486.3185241724916,
          "upper": 1510.733335832258,
          "category": "coding",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_overall_rating",
          "value": 1460.9931907385273,
          "normalized_0_100": 88.36,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 17,
          "lower": 1454.6227382886548,
          "upper": 1467.3636431884001,
          "category": "overall",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        }
      ]
    },
    {
      "model_key": "gemini-2-5-pro",
      "display_name": "gemini-2.5-pro",
      "provider": "google",
      "aliases": [
        "gemini-2.5-pro"
      ],
      "openness": null,
      "license": "Proprietary",
      "parameter_billions": null,
      "context_window": null,
      "modalities": [
        "text"
      ],
      "source_ids": [
        "lmarena"
      ],
      "composite": {
        "score_0_100": 88.04,
        "confidence": "low",
        "components": [
          {
            "source_id": "lmarena",
            "metric": "lmarena_text_overall_rating",
            "normalized_0_100": 88.04,
            "weight": 0.35
          }
        ],
        "derived_from": [
          "lmarena:lmarena_text_overall_rating"
        ],
        "limitations": [
          "This is an external model-intelligence prior, not a RunLocalAI local speed or fit measurement.",
          "Provider papers, human-preference ratings, and LiveBench judgments are not interchangeable with quantized local execution.",
          "Use this score only beside Will-It-Run fit, quantization, context, runtime, and measured tok/s evidence."
        ]
      },
      "scores": [
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_chinese_rating",
          "value": 1510.1986936118824,
          "normalized_0_100": 94.21,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 20,
          "lower": 1501.7474948642655,
          "upper": 1518.6498923594993,
          "category": "chinese",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_coding_rating",
          "value": 1453.2206530610117,
          "normalized_0_100": 87.44,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 49,
          "lower": 1448.795530921893,
          "upper": 1457.6457752001304,
          "category": "coding",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_overall_rating",
          "value": 1458.3277289811588,
          "normalized_0_100": 88.04,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 18,
          "lower": 1455.8020790716587,
          "upper": 1460.8533788906586,
          "category": "overall",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        }
      ]
    },
    {
      "model_key": "kimi-k2-6",
      "display_name": "kimi-k2.6",
      "provider": "moonshot",
      "aliases": [
        "kimi-k2.6"
      ],
      "openness": null,
      "license": "Modified MIT",
      "parameter_billions": null,
      "context_window": null,
      "modalities": [
        "text"
      ],
      "source_ids": [
        "lmarena"
      ],
      "composite": {
        "score_0_100": 87.85,
        "confidence": "low",
        "components": [
          {
            "source_id": "lmarena",
            "metric": "lmarena_text_overall_rating",
            "normalized_0_100": 87.85,
            "weight": 0.35
          }
        ],
        "derived_from": [
          "lmarena:lmarena_text_overall_rating"
        ],
        "limitations": [
          "This is an external model-intelligence prior, not a RunLocalAI local speed or fit measurement.",
          "Provider papers, human-preference ratings, and LiveBench judgments are not interchangeable with quantized local execution.",
          "Use this score only beside Will-It-Run fit, quantization, context, runtime, and measured tok/s evidence."
        ]
      },
      "scores": [
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_chinese_rating",
          "value": 1526.7797799817426,
          "normalized_0_100": 96.18,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 12,
          "lower": 1500.4382797750875,
          "upper": 1553.1212801883978,
          "category": "chinese",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_coding_rating",
          "value": 1493.8956286686955,
          "normalized_0_100": 92.27,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 13,
          "lower": 1482.379401972673,
          "upper": 1505.411855364718,
          "category": "coding",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_overall_rating",
          "value": 1456.7218117218372,
          "normalized_0_100": 87.85,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 19,
          "lower": 1450.6050220921168,
          "upper": 1462.8386013515578,
          "category": "overall",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        }
      ]
    },
    {
      "model_key": "gpt-5-4",
      "display_name": "gpt-5.4",
      "provider": "openai",
      "aliases": [
        "gpt-5.4"
      ],
      "openness": null,
      "license": "Proprietary",
      "parameter_billions": null,
      "context_window": null,
      "modalities": [
        "text"
      ],
      "source_ids": [
        "lmarena"
      ],
      "composite": {
        "score_0_100": 87.59,
        "confidence": "low",
        "components": [
          {
            "source_id": "lmarena",
            "metric": "lmarena_text_overall_rating",
            "normalized_0_100": 87.59,
            "weight": 0.35
          }
        ],
        "derived_from": [
          "lmarena:lmarena_text_overall_rating"
        ],
        "limitations": [
          "This is an external model-intelligence prior, not a RunLocalAI local speed or fit measurement.",
          "Provider papers, human-preference ratings, and LiveBench judgments are not interchangeable with quantized local execution.",
          "Use this score only beside Will-It-Run fit, quantization, context, runtime, and measured tok/s evidence."
        ]
      },
      "scores": [
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_chinese_rating",
          "value": 1532.9202170472736,
          "normalized_0_100": 96.91,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 9,
          "lower": 1514.919801576482,
          "upper": 1550.9206325180653,
          "category": "chinese",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_coding_rating",
          "value": 1482.195669563403,
          "normalized_0_100": 90.88,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 24,
          "lower": 1473.791585011226,
          "upper": 1490.59975411558,
          "category": "coding",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_overall_rating",
          "value": 1454.4749848762171,
          "normalized_0_100": 87.59,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 20,
          "lower": 1449.6179398047896,
          "upper": 1459.332029947645,
          "category": "overall",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        }
      ]
    },
    {
      "model_key": "grok-4-20-beta-0309-reasoning",
      "display_name": "grok-4.20-beta-0309-reasoning",
      "provider": "xai",
      "aliases": [
        "grok-4.20-beta-0309-reasoning"
      ],
      "openness": null,
      "license": "Proprietary",
      "parameter_billions": null,
      "context_window": null,
      "modalities": [
        "text"
      ],
      "source_ids": [
        "lmarena"
      ],
      "composite": {
        "score_0_100": 87.47,
        "confidence": "low",
        "components": [
          {
            "source_id": "lmarena",
            "metric": "lmarena_text_overall_rating",
            "normalized_0_100": 87.47,
            "weight": 0.35
          }
        ],
        "derived_from": [
          "lmarena:lmarena_text_overall_rating"
        ],
        "limitations": [
          "This is an external model-intelligence prior, not a RunLocalAI local speed or fit measurement.",
          "Provider papers, human-preference ratings, and LiveBench judgments are not interchangeable with quantized local execution.",
          "Use this score only beside Will-It-Run fit, quantization, context, runtime, and measured tok/s evidence."
        ]
      },
      "scores": [
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_chinese_rating",
          "value": 1490.5584570984017,
          "normalized_0_100": 91.88,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 31,
          "lower": 1472.1605435721026,
          "upper": 1508.956370624701,
          "category": "chinese",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_coding_rating",
          "value": 1460.4601988904158,
          "normalized_0_100": 88.3,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 41,
          "lower": 1452.1031732365743,
          "upper": 1468.8172245442572,
          "category": "coding",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_overall_rating",
          "value": 1453.4862189459766,
          "normalized_0_100": 87.47,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 21,
          "lower": 1448.6807698754335,
          "upper": 1458.2916680165195,
          "category": "overall",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        }
      ]
    },
    {
      "model_key": "claude-sonnet-4-6",
      "display_name": "claude-sonnet-4-6",
      "provider": "anthropic",
      "aliases": [
        "claude-sonnet-4-6"
      ],
      "openness": null,
      "license": "Proprietary",
      "parameter_billions": null,
      "context_window": null,
      "modalities": [
        "text"
      ],
      "source_ids": [
        "lmarena"
      ],
      "composite": {
        "score_0_100": 87.46,
        "confidence": "low",
        "components": [
          {
            "source_id": "lmarena",
            "metric": "lmarena_text_overall_rating",
            "normalized_0_100": 87.46,
            "weight": 0.35
          }
        ],
        "derived_from": [
          "lmarena:lmarena_text_overall_rating"
        ],
        "limitations": [
          "This is an external model-intelligence prior, not a RunLocalAI local speed or fit measurement.",
          "Provider papers, human-preference ratings, and LiveBench judgments are not interchangeable with quantized local execution.",
          "Use this score only beside Will-It-Run fit, quantization, context, runtime, and measured tok/s evidence."
        ]
      },
      "scores": [
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_chinese_rating",
          "value": 1485.4541410907466,
          "normalized_0_100": 91.27,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 35,
          "lower": 1466.0222992672761,
          "upper": 1504.885982914217,
          "category": "chinese",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_coding_rating",
          "value": 1500.1257178930357,
          "normalized_0_100": 93.01,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 8,
          "lower": 1491.645403012902,
          "upper": 1508.6060327731695,
          "category": "coding",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_overall_rating",
          "value": 1453.3970378440695,
          "normalized_0_100": 87.46,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 22,
          "lower": 1448.6183933674145,
          "upper": 1458.1756823207243,
          "category": "overall",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        }
      ]
    },
    {
      "model_key": "grok-4-20-multi-agent-beta-0309",
      "display_name": "grok-4.20-multi-agent-beta-0309",
      "provider": "xai",
      "aliases": [
        "grok-4.20-multi-agent-beta-0309"
      ],
      "openness": null,
      "license": "Proprietary",
      "parameter_billions": null,
      "context_window": null,
      "modalities": [
        "text"
      ],
      "source_ids": [
        "lmarena"
      ],
      "composite": {
        "score_0_100": 87.28,
        "confidence": "low",
        "components": [
          {
            "source_id": "lmarena",
            "metric": "lmarena_text_overall_rating",
            "normalized_0_100": 87.28,
            "weight": 0.35
          }
        ],
        "derived_from": [
          "lmarena:lmarena_text_overall_rating"
        ],
        "limitations": [
          "This is an external model-intelligence prior, not a RunLocalAI local speed or fit measurement.",
          "Provider papers, human-preference ratings, and LiveBench judgments are not interchangeable with quantized local execution.",
          "Use this score only beside Will-It-Run fit, quantization, context, runtime, and measured tok/s evidence."
        ]
      },
      "scores": [
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_chinese_rating",
          "value": 1478.2999312482214,
          "normalized_0_100": 90.42,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 42,
          "lower": 1460.33392522816,
          "upper": 1496.2659372682824,
          "category": "chinese",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_coding_rating",
          "value": 1462.3735804396701,
          "normalized_0_100": 88.53,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 39,
          "lower": 1454.1866016958406,
          "upper": 1470.5605591834994,
          "category": "coding",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_overall_rating",
          "value": 1451.9094953701597,
          "normalized_0_100": 87.28,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 23,
          "lower": 1447.0986318082319,
          "upper": 1456.7203589320873,
          "category": "overall",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        }
      ]
    },
    {
      "model_key": "dola-seed-2-0-pro",
      "display_name": "dola-seed-2.0-pro",
      "provider": "bytedance",
      "aliases": [
        "dola-seed-2.0-pro"
      ],
      "openness": null,
      "license": "Proprietary",
      "parameter_billions": null,
      "context_window": null,
      "modalities": [
        "text"
      ],
      "source_ids": [
        "lmarena"
      ],
      "composite": {
        "score_0_100": 87.11,
        "confidence": "low",
        "components": [
          {
            "source_id": "lmarena",
            "metric": "lmarena_text_overall_rating",
            "normalized_0_100": 87.11,
            "weight": 0.35
          }
        ],
        "derived_from": [
          "lmarena:lmarena_text_overall_rating"
        ],
        "limitations": [
          "This is an external model-intelligence prior, not a RunLocalAI local speed or fit measurement.",
          "Provider papers, human-preference ratings, and LiveBench judgments are not interchangeable with quantized local execution.",
          "Use this score only beside Will-It-Run fit, quantization, context, runtime, and measured tok/s evidence."
        ]
      },
      "scores": [
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_chinese_rating",
          "value": 1510.3415072976145,
          "normalized_0_100": 94.23,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 19,
          "lower": 1494.78123609101,
          "upper": 1525.9017785042188,
          "category": "chinese",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_coding_rating",
          "value": 1473.9099781714463,
          "normalized_0_100": 89.9,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 29,
          "lower": 1466.6416604026583,
          "upper": 1481.1782959402344,
          "category": "coding",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_overall_rating",
          "value": 1450.4672361925354,
          "normalized_0_100": 87.11,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 24,
          "lower": 1446.1846413109802,
          "upper": 1454.7498310740905,
          "category": "overall",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        }
      ]
    },
    {
      "model_key": "deepseek-v4-pro-thinking",
      "display_name": "deepseek-v4-pro-thinking",
      "provider": "deepseek",
      "aliases": [
        "deepseek-v4-pro-thinking"
      ],
      "openness": null,
      "license": "MIT",
      "parameter_billions": null,
      "context_window": null,
      "modalities": [
        "text"
      ],
      "source_ids": [
        "lmarena"
      ],
      "composite": {
        "score_0_100": 87.08,
        "confidence": "low",
        "components": [
          {
            "source_id": "lmarena",
            "metric": "lmarena_text_overall_rating",
            "normalized_0_100": 87.08,
            "weight": 0.35
          }
        ],
        "derived_from": [
          "lmarena:lmarena_text_overall_rating"
        ],
        "limitations": [
          "This is an external model-intelligence prior, not a RunLocalAI local speed or fit measurement.",
          "Provider papers, human-preference ratings, and LiveBench judgments are not interchangeable with quantized local execution.",
          "Use this score only beside Will-It-Run fit, quantization, context, runtime, and measured tok/s evidence."
        ]
      },
      "scores": [
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_chinese_rating",
          "value": 1493.2354563815918,
          "normalized_0_100": 92.2,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 28,
          "lower": 1467.5477289843948,
          "upper": 1518.9231837787888,
          "category": "chinese",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_coding_rating",
          "value": 1464.044637201838,
          "normalized_0_100": 88.72,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 37,
          "lower": 1452.471400859723,
          "upper": 1475.6178735439532,
          "category": "coding",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_overall_rating",
          "value": 1450.181793549347,
          "normalized_0_100": 87.08,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 25,
          "lower": 1443.8855081428396,
          "upper": 1456.4780789558545,
          "category": "overall",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        }
      ]
    },
    {
      "model_key": "deepseek-v4-pro",
      "display_name": "deepseek-v4-pro",
      "provider": "deepseek",
      "aliases": [
        "deepseek-v4-pro"
      ],
      "openness": null,
      "license": "MIT",
      "parameter_billions": null,
      "context_window": null,
      "modalities": [
        "text"
      ],
      "source_ids": [
        "lmarena"
      ],
      "composite": {
        "score_0_100": 87.04,
        "confidence": "low",
        "components": [
          {
            "source_id": "lmarena",
            "metric": "lmarena_text_overall_rating",
            "normalized_0_100": 87.04,
            "weight": 0.35
          }
        ],
        "derived_from": [
          "lmarena:lmarena_text_overall_rating"
        ],
        "limitations": [
          "This is an external model-intelligence prior, not a RunLocalAI local speed or fit measurement.",
          "Provider papers, human-preference ratings, and LiveBench judgments are not interchangeable with quantized local execution.",
          "Use this score only beside Will-It-Run fit, quantization, context, runtime, and measured tok/s evidence."
        ]
      },
      "scores": [
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_chinese_rating",
          "value": 1503.7570959197753,
          "normalized_0_100": 93.45,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 24,
          "lower": 1479.3867320700988,
          "upper": 1528.127459769452,
          "category": "chinese",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_coding_rating",
          "value": 1468.520527229183,
          "normalized_0_100": 89.26,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 32,
          "lower": 1457.6280930141788,
          "upper": 1479.4129614441872,
          "category": "coding",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_overall_rating",
          "value": 1449.8624687454205,
          "normalized_0_100": 87.04,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 26,
          "lower": 1443.7288861080076,
          "upper": 1455.9960513828332,
          "category": "overall",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        }
      ]
    },
    {
      "model_key": "claude-opus-4-5-20251101",
      "display_name": "claude-opus-4-5-20251101",
      "provider": "anthropic",
      "aliases": [
        "claude-opus-4-5-20251101"
      ],
      "openness": null,
      "license": "Proprietary",
      "parameter_billions": null,
      "context_window": null,
      "modalities": [
        "text"
      ],
      "source_ids": [
        "lmarena"
      ],
      "composite": {
        "score_0_100": 86.86,
        "confidence": "low",
        "components": [
          {
            "source_id": "lmarena",
            "metric": "lmarena_text_overall_rating",
            "normalized_0_100": 86.86,
            "weight": 0.35
          }
        ],
        "derived_from": [
          "lmarena:lmarena_text_overall_rating"
        ],
        "limitations": [
          "This is an external model-intelligence prior, not a RunLocalAI local speed or fit measurement.",
          "Provider papers, human-preference ratings, and LiveBench judgments are not interchangeable with quantized local execution.",
          "Use this score only beside Will-It-Run fit, quantization, context, runtime, and measured tok/s evidence."
        ]
      },
      "scores": [
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_chinese_rating",
          "value": 1470.2009561526575,
          "normalized_0_100": 89.46,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 53,
          "lower": 1458.4511976146375,
          "upper": 1481.9507146906774,
          "category": "chinese",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_coding_rating",
          "value": 1497.141659087758,
          "normalized_0_100": 92.66,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 11,
          "lower": 1491.208827373823,
          "upper": 1503.0744908016927,
          "category": "coding",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_overall_rating",
          "value": 1448.3878160002903,
          "normalized_0_100": 86.86,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 27,
          "lower": 1445.0423667750376,
          "upper": 1451.733265225543,
          "category": "overall",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        }
      ]
    },
    {
      "model_key": "ernie-5-0-0110",
      "display_name": "ernie-5.0-0110",
      "provider": "baidu",
      "aliases": [
        "ernie-5.0-0110"
      ],
      "openness": null,
      "license": "Proprietary",
      "parameter_billions": null,
      "context_window": null,
      "modalities": [
        "text"
      ],
      "source_ids": [
        "lmarena"
      ],
      "composite": {
        "score_0_100": 86.85,
        "confidence": "low",
        "components": [
          {
            "source_id": "lmarena",
            "metric": "lmarena_text_overall_rating",
            "normalized_0_100": 86.85,
            "weight": 0.35
          }
        ],
        "derived_from": [
          "lmarena:lmarena_text_overall_rating"
        ],
        "limitations": [
          "This is an external model-intelligence prior, not a RunLocalAI local speed or fit measurement.",
          "Provider papers, human-preference ratings, and LiveBench judgments are not interchangeable with quantized local execution.",
          "Use this score only beside Will-It-Run fit, quantization, context, runtime, and measured tok/s evidence."
        ]
      },
      "scores": [
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_chinese_rating",
          "value": 1504.2449231274763,
          "normalized_0_100": 93.5,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 23,
          "lower": 1488.337180973357,
          "upper": 1520.1526652815958,
          "category": "chinese",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_coding_rating",
          "value": 1461.9374339686058,
          "normalized_0_100": 88.47,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 40,
          "lower": 1454.856181856234,
          "upper": 1469.018686080978,
          "category": "coding",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_overall_rating",
          "value": 1448.2802314736514,
          "normalized_0_100": 86.85,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 28,
          "lower": 1444.2146153961935,
          "upper": 1452.3458475511093,
          "category": "overall",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        }
      ]
    },
    {
      "model_key": "amazon-nova-experimental-chat-26-02-10",
      "display_name": "amazon-nova-experimental-chat-26-02-10",
      "provider": "amazon",
      "aliases": [
        "amazon-nova-experimental-chat-26-02-10"
      ],
      "openness": null,
      "license": "Proprietary",
      "parameter_billions": null,
      "context_window": null,
      "modalities": [
        "text"
      ],
      "source_ids": [
        "lmarena"
      ],
      "composite": {
        "score_0_100": 86.8,
        "confidence": "low",
        "components": [
          {
            "source_id": "lmarena",
            "metric": "lmarena_text_overall_rating",
            "normalized_0_100": 86.8,
            "weight": 0.35
          }
        ],
        "derived_from": [
          "lmarena:lmarena_text_overall_rating"
        ],
        "limitations": [
          "This is an external model-intelligence prior, not a RunLocalAI local speed or fit measurement.",
          "Provider papers, human-preference ratings, and LiveBench judgments are not interchangeable with quantized local execution.",
          "Use this score only beside Will-It-Run fit, quantization, context, runtime, and measured tok/s evidence."
        ]
      },
      "scores": [
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_chinese_rating",
          "value": 1457.4451531541529,
          "normalized_0_100": 87.94,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 73,
          "lower": 1418.5414517568245,
          "upper": 1496.3488545514815,
          "category": "chinese",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_coding_rating",
          "value": 1485.7369526922355,
          "normalized_0_100": 91.3,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 18,
          "lower": 1465.8131243684031,
          "upper": 1505.660781016068,
          "category": "coding",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_overall_rating",
          "value": 1447.8615877062714,
          "normalized_0_100": 86.8,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 29,
          "lower": 1438.0475410791923,
          "upper": 1457.6756343333504,
          "category": "overall",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        }
      ]
    },
    {
      "model_key": "gemini-3-flash-thinking-minimal",
      "display_name": "gemini-3-flash (thinking-minimal)",
      "provider": "google",
      "aliases": [
        "gemini-3-flash (thinking-minimal)"
      ],
      "openness": null,
      "license": "Proprietary",
      "parameter_billions": null,
      "context_window": null,
      "modalities": [
        "text"
      ],
      "source_ids": [
        "lmarena"
      ],
      "composite": {
        "score_0_100": 86.76,
        "confidence": "low",
        "components": [
          {
            "source_id": "lmarena",
            "metric": "lmarena_text_overall_rating",
            "normalized_0_100": 86.76,
            "weight": 0.35
          }
        ],
        "derived_from": [
          "lmarena:lmarena_text_overall_rating"
        ],
        "limitations": [
          "This is an external model-intelligence prior, not a RunLocalAI local speed or fit measurement.",
          "Provider papers, human-preference ratings, and LiveBench judgments are not interchangeable with quantized local execution.",
          "Use this score only beside Will-It-Run fit, quantization, context, runtime, and measured tok/s evidence."
        ]
      },
      "scores": [
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_chinese_rating",
          "value": 1481.8016748246232,
          "normalized_0_100": 90.84,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 37,
          "lower": 1468.327400517653,
          "upper": 1495.2759491315935,
          "category": "chinese",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_coding_rating",
          "value": 1443.503971830757,
          "normalized_0_100": 86.28,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 65,
          "lower": 1437.1941638817607,
          "upper": 1449.8137797797538,
          "category": "coding",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_overall_rating",
          "value": 1447.5642758709448,
          "normalized_0_100": 86.76,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 30,
          "lower": 1443.9784628851062,
          "upper": 1451.1500888567837,
          "category": "overall",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        }
      ]
    },
    {
      "model_key": "grok-4-20-beta1",
      "display_name": "grok-4.20-beta1",
      "provider": "xai",
      "aliases": [
        "grok-4.20-beta1"
      ],
      "openness": null,
      "license": "Proprietary",
      "parameter_billions": null,
      "context_window": null,
      "modalities": [
        "text"
      ],
      "source_ids": [
        "lmarena"
      ],
      "composite": {
        "score_0_100": 86.72,
        "confidence": "low",
        "components": [
          {
            "source_id": "lmarena",
            "metric": "lmarena_text_overall_rating",
            "normalized_0_100": 86.72,
            "weight": 0.35
          }
        ],
        "derived_from": [
          "lmarena:lmarena_text_overall_rating"
        ],
        "limitations": [
          "This is an external model-intelligence prior, not a RunLocalAI local speed or fit measurement.",
          "Provider papers, human-preference ratings, and LiveBench judgments are not interchangeable with quantized local execution.",
          "Use this score only beside Will-It-Run fit, quantization, context, runtime, and measured tok/s evidence."
        ]
      },
      "scores": [
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_chinese_rating",
          "value": 1480.3215737336102,
          "normalized_0_100": 90.66,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 39,
          "lower": 1461.9060241953305,
          "upper": 1498.73712327189,
          "category": "chinese",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_coding_rating",
          "value": 1448.6400001505913,
          "normalized_0_100": 86.89,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 54,
          "lower": 1440.344460059756,
          "upper": 1456.9355402414265,
          "category": "coding",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_overall_rating",
          "value": 1447.207888089439,
          "normalized_0_100": 86.72,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 31,
          "lower": 1442.3886958972246,
          "upper": 1452.027080281653,
          "category": "overall",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        }
      ]
    },
    {
      "model_key": "claude-opus-4-5-20251101-thinking-32k",
      "display_name": "claude-opus-4-5-20251101-thinking-32k",
      "provider": "anthropic",
      "aliases": [
        "claude-opus-4-5-20251101-thinking-32k"
      ],
      "openness": null,
      "license": "Proprietary",
      "parameter_billions": null,
      "context_window": null,
      "modalities": [
        "text"
      ],
      "source_ids": [
        "lmarena"
      ],
      "composite": {
        "score_0_100": 86.65,
        "confidence": "low",
        "components": [
          {
            "source_id": "lmarena",
            "metric": "lmarena_text_overall_rating",
            "normalized_0_100": 86.65,
            "weight": 0.35
          }
        ],
        "derived_from": [
          "lmarena:lmarena_text_overall_rating"
        ],
        "limitations": [
          "This is an external model-intelligence prior, not a RunLocalAI local speed or fit measurement.",
          "Provider papers, human-preference ratings, and LiveBench judgments are not interchangeable with quantized local execution.",
          "Use this score only beside Will-It-Run fit, quantization, context, runtime, and measured tok/s evidence."
        ]
      },
      "scores": [
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_chinese_rating",
          "value": 1461.5768966728356,
          "normalized_0_100": 88.43,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 66,
          "lower": 1447.472328295132,
          "upper": 1475.681465050539,
          "category": "chinese",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_coding_rating",
          "value": 1503.199846757709,
          "normalized_0_100": 93.38,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 5,
          "lower": 1495.8672009347817,
          "upper": 1510.5324925806367,
          "category": "coding",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_overall_rating",
          "value": 1446.563809637466,
          "normalized_0_100": 86.65,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 32,
          "lower": 1442.6598166044705,
          "upper": 1450.4678026704614,
          "category": "overall",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        }
      ]
    },
    {
      "model_key": "glm-5",
      "display_name": "glm-5",
      "provider": "zai",
      "aliases": [
        "glm-5"
      ],
      "openness": null,
      "license": "MIT",
      "parameter_billions": null,
      "context_window": null,
      "modalities": [
        "text"
      ],
      "source_ids": [
        "lmarena"
      ],
      "composite": {
        "score_0_100": 86.48,
        "confidence": "low",
        "components": [
          {
            "source_id": "lmarena",
            "metric": "lmarena_text_overall_rating",
            "normalized_0_100": 86.48,
            "weight": 0.35
          }
        ],
        "derived_from": [
          "lmarena:lmarena_text_overall_rating"
        ],
        "limitations": [
          "This is an external model-intelligence prior, not a RunLocalAI local speed or fit measurement.",
          "Provider papers, human-preference ratings, and LiveBench judgments are not interchangeable with quantized local execution.",
          "Use this score only beside Will-It-Run fit, quantization, context, runtime, and measured tok/s evidence."
        ]
      },
      "scores": [
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_chinese_rating",
          "value": 1520.3400382976972,
          "normalized_0_100": 95.42,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 14,
          "lower": 1501.6215833390181,
          "upper": 1539.0584932563763,
          "category": "chinese",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_coding_rating",
          "value": 1456.3346234892083,
          "normalized_0_100": 87.81,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 44,
          "lower": 1447.9322490774236,
          "upper": 1464.7369979009932,
          "category": "coding",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_overall_rating",
          "value": 1445.2114298917868,
          "normalized_0_100": 86.48,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 33,
          "lower": 1440.52730807901,
          "upper": 1449.8955517045636,
          "category": "overall",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        }
      ]
    },
    {
      "model_key": "kimi-k2-5-thinking",
      "display_name": "kimi-k2.5-thinking",
      "provider": "moonshot",
      "aliases": [
        "kimi-k2.5-thinking"
      ],
      "openness": null,
      "license": "Modified MIT",
      "parameter_billions": null,
      "context_window": null,
      "modalities": [
        "text"
      ],
      "source_ids": [
        "lmarena"
      ],
      "composite": {
        "score_0_100": 86.48,
        "confidence": "low",
        "components": [
          {
            "source_id": "lmarena",
            "metric": "lmarena_text_overall_rating",
            "normalized_0_100": 86.48,
            "weight": 0.35
          }
        ],
        "derived_from": [
          "lmarena:lmarena_text_overall_rating"
        ],
        "limitations": [
          "This is an external model-intelligence prior, not a RunLocalAI local speed or fit measurement.",
          "Provider papers, human-preference ratings, and LiveBench judgments are not interchangeable with quantized local execution.",
          "Use this score only beside Will-It-Run fit, quantization, context, runtime, and measured tok/s evidence."
        ]
      },
      "scores": [
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_chinese_rating",
          "value": 1507.5808684580027,
          "normalized_0_100": 93.9,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 22,
          "lower": 1490.9876147536695,
          "upper": 1524.1741221623358,
          "category": "chinese",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_coding_rating",
          "value": 1478.7158104217945,
          "normalized_0_100": 90.47,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 27,
          "lower": 1471.5949531467174,
          "upper": 1485.8366676968712,
          "category": "coding",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_overall_rating",
          "value": 1445.1866623008652,
          "normalized_0_100": 86.48,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 34,
          "lower": 1441.078863091551,
          "upper": 1449.2944615101794,
          "category": "overall",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        }
      ]
    },
    {
      "model_key": "microsoft-phi-3-5-mini-instruct",
      "display_name": "microsoft/Phi-3.5-mini-instruct",
      "provider": "microsoft",
      "aliases": [
        "microsoft/Phi-3.5-mini-instruct"
      ],
      "openness": "open",
      "license": "Unknown",
      "parameter_billions": 3.8,
      "context_window": null,
      "modalities": [
        "text"
      ],
      "source_ids": [
        "openevals"
      ],
      "composite": {
        "score_0_100": 86.2,
        "confidence": "low",
        "components": [
          {
            "source_id": "openevals",
            "metric": "openevals_aggregate",
            "normalized_0_100": 86.2,
            "weight": 0.45
          }
        ],
        "derived_from": [
          "openevals:openevals_aggregate"
        ],
        "limitations": [
          "This is an external model-intelligence prior, not a RunLocalAI local speed or fit measurement.",
          "Provider papers, human-preference ratings, and LiveBench judgments are not interchangeable with quantized local execution.",
          "Use this score only beside Will-It-Run fit, quantization, context, runtime, and measured tok/s evidence."
        ]
      },
      "scores": [
        {
          "source_id": "openevals",
          "metric": "gsm8k",
          "value": 86.2,
          "normalized_0_100": 86.2,
          "scale": "0-100",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/OpenEvals/leaderboard-data",
          "source_dataset": "OpenEvals/leaderboard-data",
          "source_config": "default",
          "source_split": "train",
          "source_row_count": 105,
          "captured_at": "2026-05-28T11:57:28.766Z",
          "evidence_note": "External model quality prior from OpenEvals; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "openevals",
          "metric": "openevals_aggregate",
          "value": 86.2,
          "normalized_0_100": 86.2,
          "scale": "0-100",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/OpenEvals/leaderboard-data",
          "source_dataset": "OpenEvals/leaderboard-data",
          "source_config": "default",
          "source_split": "train",
          "source_row_count": 105,
          "captured_at": "2026-05-28T11:57:28.766Z",
          "evidence_note": "External model quality prior from OpenEvals; not a RunLocalAI local execution measurement."
        }
      ]
    },
    {
      "model_key": "qwen3-6-max-preview",
      "display_name": "qwen3.6-max-preview",
      "provider": "alibaba",
      "aliases": [
        "qwen3.6-max-preview"
      ],
      "openness": null,
      "license": "Proprietary",
      "parameter_billions": null,
      "context_window": null,
      "modalities": [
        "text"
      ],
      "source_ids": [
        "lmarena"
      ],
      "composite": {
        "score_0_100": 86.16,
        "confidence": "low",
        "components": [
          {
            "source_id": "lmarena",
            "metric": "lmarena_text_overall_rating",
            "normalized_0_100": 86.16,
            "weight": 0.35
          }
        ],
        "derived_from": [
          "lmarena:lmarena_text_overall_rating"
        ],
        "limitations": [
          "This is an external model-intelligence prior, not a RunLocalAI local speed or fit measurement.",
          "Provider papers, human-preference ratings, and LiveBench judgments are not interchangeable with quantized local execution.",
          "Use this score only beside Will-It-Run fit, quantization, context, runtime, and measured tok/s evidence."
        ]
      },
      "scores": [
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_chinese_rating",
          "value": 1490.4645326466123,
          "normalized_0_100": 91.87,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 32,
          "lower": 1455.4860372495275,
          "upper": 1525.4430280436968,
          "category": "chinese",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_coding_rating",
          "value": 1458.7696121034994,
          "normalized_0_100": 88.1,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 42,
          "lower": 1441.8578563480148,
          "upper": 1475.6813678589838,
          "category": "coding",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_overall_rating",
          "value": 1442.4765748223074,
          "normalized_0_100": 86.16,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 35,
          "lower": 1433.6388341725794,
          "upper": 1451.3143154720353,
          "category": "overall",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        }
      ]
    },
    {
      "model_key": "ernie-5-0-preview-1203",
      "display_name": "ernie-5.0-preview-1203",
      "provider": "baidu",
      "aliases": [
        "ernie-5.0-preview-1203"
      ],
      "openness": null,
      "license": "Proprietary",
      "parameter_billions": null,
      "context_window": null,
      "modalities": [
        "text"
      ],
      "source_ids": [
        "lmarena"
      ],
      "composite": {
        "score_0_100": 86.15,
        "confidence": "low",
        "components": [
          {
            "source_id": "lmarena",
            "metric": "lmarena_text_overall_rating",
            "normalized_0_100": 86.15,
            "weight": 0.35
          }
        ],
        "derived_from": [
          "lmarena:lmarena_text_overall_rating"
        ],
        "limitations": [
          "This is an external model-intelligence prior, not a RunLocalAI local speed or fit measurement.",
          "Provider papers, human-preference ratings, and LiveBench judgments are not interchangeable with quantized local execution.",
          "Use this score only beside Will-It-Run fit, quantization, context, runtime, and measured tok/s evidence."
        ]
      },
      "scores": [
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_chinese_rating",
          "value": 1478.9154883441347,
          "normalized_0_100": 90.49,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 41,
          "lower": 1453.5028392008533,
          "upper": 1504.3281374874161,
          "category": "chinese",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_coding_rating",
          "value": 1426.2471928627695,
          "normalized_0_100": 84.23,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 92,
          "lower": 1412.9805571068991,
          "upper": 1439.51382861864,
          "category": "coding",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_overall_rating",
          "value": 1442.4207716908795,
          "normalized_0_100": 86.15,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 36,
          "lower": 1435.9112170348194,
          "upper": 1448.9303263469394,
          "category": "overall",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        }
      ]
    },
    {
      "model_key": "gemma-4-31b",
      "display_name": "gemma-4-31b",
      "provider": "google",
      "aliases": [
        "gemma-4-31b"
      ],
      "openness": null,
      "license": "Apache 2.0",
      "parameter_billions": null,
      "context_window": null,
      "modalities": [
        "text"
      ],
      "source_ids": [
        "lmarena"
      ],
      "composite": {
        "score_0_100": 86.1,
        "confidence": "low",
        "components": [
          {
            "source_id": "lmarena",
            "metric": "lmarena_text_overall_rating",
            "normalized_0_100": 86.1,
            "weight": 0.35
          }
        ],
        "derived_from": [
          "lmarena:lmarena_text_overall_rating"
        ],
        "limitations": [
          "This is an external model-intelligence prior, not a RunLocalAI local speed or fit measurement.",
          "Provider papers, human-preference ratings, and LiveBench judgments are not interchangeable with quantized local execution.",
          "Use this score only beside Will-It-Run fit, quantization, context, runtime, and measured tok/s evidence."
        ]
      },
      "scores": [
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_chinese_rating",
          "value": 1473.79951721338,
          "normalized_0_100": 89.88,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 47,
          "lower": 1443.6127883343036,
          "upper": 1503.9862460924564,
          "category": "chinese",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_coding_rating",
          "value": 1454.300614527398,
          "normalized_0_100": 87.57,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 46,
          "lower": 1438.9116807695743,
          "upper": 1469.689548285222,
          "category": "coding",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_overall_rating",
          "value": 1442.01495524381,
          "normalized_0_100": 86.1,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 37,
          "lower": 1434.4222773118363,
          "upper": 1449.6076331757836,
          "category": "overall",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        }
      ]
    },
    {
      "model_key": "gpt-5-1-high",
      "display_name": "gpt-5.1-high",
      "provider": "openai",
      "aliases": [
        "gpt-5.1-high"
      ],
      "openness": null,
      "license": "Proprietary",
      "parameter_billions": null,
      "context_window": null,
      "modalities": [
        "text"
      ],
      "source_ids": [
        "lmarena"
      ],
      "composite": {
        "score_0_100": 86.05,
        "confidence": "low",
        "components": [
          {
            "source_id": "lmarena",
            "metric": "lmarena_text_overall_rating",
            "normalized_0_100": 86.05,
            "weight": 0.35
          }
        ],
        "derived_from": [
          "lmarena:lmarena_text_overall_rating"
        ],
        "limitations": [
          "This is an external model-intelligence prior, not a RunLocalAI local speed or fit measurement.",
          "Provider papers, human-preference ratings, and LiveBench judgments are not interchangeable with quantized local execution.",
          "Use this score only beside Will-It-Run fit, quantization, context, runtime, and measured tok/s evidence."
        ]
      },
      "scores": [
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_chinese_rating",
          "value": 1492.6114126990967,
          "normalized_0_100": 92.12,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 30,
          "lower": 1478.9567176278015,
          "upper": 1506.2661077703922,
          "category": "chinese",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_coding_rating",
          "value": 1452.3956957044386,
          "normalized_0_100": 87.34,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 50,
          "lower": 1445.3289602083828,
          "upper": 1459.4624312004946,
          "category": "coding",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_overall_rating",
          "value": 1441.556924630315,
          "normalized_0_100": 86.05,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 38,
          "lower": 1437.7905839415087,
          "upper": 1445.3232653191214,
          "category": "overall",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        }
      ]
    },
    {
      "model_key": "qwen3-5-397b-a17b",
      "display_name": "qwen3.5-397b-a17b",
      "provider": "alibaba",
      "aliases": [
        "qwen3.5-397b-a17b"
      ],
      "openness": null,
      "license": "Apache 2.0",
      "parameter_billions": null,
      "context_window": null,
      "modalities": [
        "text"
      ],
      "source_ids": [
        "lmarena"
      ],
      "composite": {
        "score_0_100": 86.02,
        "confidence": "low",
        "components": [
          {
            "source_id": "lmarena",
            "metric": "lmarena_text_overall_rating",
            "normalized_0_100": 86.02,
            "weight": 0.35
          }
        ],
        "derived_from": [
          "lmarena:lmarena_text_overall_rating"
        ],
        "limitations": [
          "This is an external model-intelligence prior, not a RunLocalAI local speed or fit measurement.",
          "Provider papers, human-preference ratings, and LiveBench judgments are not interchangeable with quantized local execution.",
          "Use this score only beside Will-It-Run fit, quantization, context, runtime, and measured tok/s evidence."
        ]
      },
      "scores": [
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_chinese_rating",
          "value": 1507.8461129937705,
          "normalized_0_100": 93.93,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 21,
          "lower": 1490.2069107338175,
          "upper": 1525.4853152537235,
          "category": "chinese",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_coding_rating",
          "value": 1465.7320771442853,
          "normalized_0_100": 88.92,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 35,
          "lower": 1458.1757208620247,
          "upper": 1473.2884334265457,
          "category": "coding",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_overall_rating",
          "value": 1441.268692773968,
          "normalized_0_100": 86.02,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 39,
          "lower": 1436.8793911759035,
          "upper": 1445.6579943720328,
          "category": "overall",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        }
      ]
    },
    {
      "model_key": "internlm-internlm2-5-7b-chat",
      "display_name": "internlm/internlm2_5-7b-chat",
      "provider": "internlm",
      "aliases": [
        "internlm/internlm2_5-7b-chat"
      ],
      "openness": "open",
      "license": "Unknown",
      "parameter_billions": 7.7,
      "context_window": null,
      "modalities": [
        "text"
      ],
      "source_ids": [
        "openevals"
      ],
      "composite": {
        "score_0_100": 86,
        "confidence": "low",
        "components": [
          {
            "source_id": "openevals",
            "metric": "openevals_aggregate",
            "normalized_0_100": 86,
            "weight": 0.45
          }
        ],
        "derived_from": [
          "openevals:openevals_aggregate"
        ],
        "limitations": [
          "This is an external model-intelligence prior, not a RunLocalAI local speed or fit measurement.",
          "Provider papers, human-preference ratings, and LiveBench judgments are not interchangeable with quantized local execution.",
          "Use this score only beside Will-It-Run fit, quantization, context, runtime, and measured tok/s evidence."
        ]
      },
      "scores": [
        {
          "source_id": "openevals",
          "metric": "gsm8k",
          "value": 86,
          "normalized_0_100": 86,
          "scale": "0-100",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/OpenEvals/leaderboard-data",
          "source_dataset": "OpenEvals/leaderboard-data",
          "source_config": "default",
          "source_split": "train",
          "source_row_count": 105,
          "captured_at": "2026-05-28T11:57:28.766Z",
          "evidence_note": "External model quality prior from OpenEvals; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "openevals",
          "metric": "openevals_aggregate",
          "value": 86,
          "normalized_0_100": 86,
          "scale": "0-100",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/OpenEvals/leaderboard-data",
          "source_dataset": "OpenEvals/leaderboard-data",
          "source_config": "default",
          "source_split": "train",
          "source_row_count": 105,
          "captured_at": "2026-05-28T11:57:28.766Z",
          "evidence_note": "External model quality prior from OpenEvals; not a RunLocalAI local execution measurement."
        }
      ]
    },
    {
      "model_key": "glm-4-6",
      "display_name": "glm-4.6",
      "provider": "zai",
      "aliases": [
        "glm-4.6"
      ],
      "openness": null,
      "license": "MIT",
      "parameter_billions": null,
      "context_window": null,
      "modalities": [
        "text"
      ],
      "source_ids": [
        "lmarena"
      ],
      "composite": {
        "score_0_100": 85.92,
        "confidence": "low",
        "components": [
          {
            "source_id": "lmarena",
            "metric": "lmarena_text_overall_rating",
            "normalized_0_100": 85.92,
            "weight": 0.35
          }
        ],
        "derived_from": [
          "lmarena:lmarena_text_overall_rating"
        ],
        "limitations": [
          "This is an external model-intelligence prior, not a RunLocalAI local speed or fit measurement.",
          "Provider papers, human-preference ratings, and LiveBench judgments are not interchangeable with quantized local execution.",
          "Use this score only beside Will-It-Run fit, quantization, context, runtime, and measured tok/s evidence."
        ]
      },
      "scores": [
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_chinese_rating",
          "value": 1499.1407931149586,
          "normalized_0_100": 92.9,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 27,
          "lower": 1484.4040169482607,
          "upper": 1513.8775692816562,
          "category": "chinese",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_coding_rating",
          "value": 1449.8344355002844,
          "normalized_0_100": 87.03,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 53,
          "lower": 1442.5961332887437,
          "upper": 1457.0727377118253,
          "category": "coding",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_overall_rating",
          "value": 1440.4563623093425,
          "normalized_0_100": 85.92,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 40,
          "lower": 1436.543539772058,
          "upper": 1444.369184846627,
          "category": "overall",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        }
      ]
    },
    {
      "model_key": "datalab-to-chandra-ocr-2",
      "display_name": "datalab-to/chandra-ocr-2",
      "provider": "datalab-to",
      "aliases": [
        "datalab-to/chandra-ocr-2"
      ],
      "openness": "open",
      "license": "Unknown",
      "parameter_billions": 5.3,
      "context_window": null,
      "modalities": [
        "text"
      ],
      "source_ids": [
        "openevals"
      ],
      "composite": {
        "score_0_100": 85.9,
        "confidence": "low",
        "components": [
          {
            "source_id": "openevals",
            "metric": "openevals_aggregate",
            "normalized_0_100": 85.9,
            "weight": 0.45
          }
        ],
        "derived_from": [
          "openevals:openevals_aggregate"
        ],
        "limitations": [
          "This is an external model-intelligence prior, not a RunLocalAI local speed or fit measurement.",
          "Provider papers, human-preference ratings, and LiveBench judgments are not interchangeable with quantized local execution.",
          "Use this score only beside Will-It-Run fit, quantization, context, runtime, and measured tok/s evidence."
        ]
      },
      "scores": [
        {
          "source_id": "openevals",
          "metric": "olm_ocr",
          "value": 85.9,
          "normalized_0_100": 85.9,
          "scale": "0-100",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/OpenEvals/leaderboard-data",
          "source_dataset": "OpenEvals/leaderboard-data",
          "source_config": "default",
          "source_split": "train",
          "source_row_count": 105,
          "captured_at": "2026-05-28T11:57:28.766Z",
          "evidence_note": "External model quality prior from OpenEvals; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "openevals",
          "metric": "openevals_aggregate",
          "value": 85.9,
          "normalized_0_100": 85.9,
          "scale": "0-100",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/OpenEvals/leaderboard-data",
          "source_dataset": "OpenEvals/leaderboard-data",
          "source_config": "default",
          "source_split": "train",
          "source_row_count": 105,
          "captured_at": "2026-05-28T11:57:28.766Z",
          "evidence_note": "External model quality prior from OpenEvals; not a RunLocalAI local execution measurement."
        }
      ]
    },
    {
      "model_key": "gpt-5-2-chat-latest-20260210",
      "display_name": "gpt-5.2-chat-latest-20260210",
      "provider": "openai",
      "aliases": [
        "gpt-5.2-chat-latest-20260210"
      ],
      "openness": null,
      "license": "Proprietary",
      "parameter_billions": null,
      "context_window": null,
      "modalities": [
        "text"
      ],
      "source_ids": [
        "lmarena"
      ],
      "composite": {
        "score_0_100": 85.85,
        "confidence": "low",
        "components": [
          {
            "source_id": "lmarena",
            "metric": "lmarena_text_overall_rating",
            "normalized_0_100": 85.85,
            "weight": 0.35
          }
        ],
        "derived_from": [
          "lmarena:lmarena_text_overall_rating"
        ],
        "limitations": [
          "This is an external model-intelligence prior, not a RunLocalAI local speed or fit measurement.",
          "Provider papers, human-preference ratings, and LiveBench judgments are not interchangeable with quantized local execution.",
          "Use this score only beside Will-It-Run fit, quantization, context, runtime, and measured tok/s evidence."
        ]
      },
      "scores": [
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_chinese_rating",
          "value": 1473.469820959958,
          "normalized_0_100": 89.85,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 48,
          "lower": 1457.071971056892,
          "upper": 1489.8676708630242,
          "category": "chinese",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_coding_rating",
          "value": 1447.4137450186088,
          "normalized_0_100": 86.75,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 55,
          "lower": 1439.9106890428136,
          "upper": 1454.9168009944037,
          "category": "coding",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_overall_rating",
          "value": 1439.8708705492693,
          "normalized_0_100": 85.85,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 41,
          "lower": 1435.574839179126,
          "upper": 1444.1669019194126,
          "category": "overall",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        }
      ]
    },
    {
      "model_key": "grok-4-1-thinking",
      "display_name": "grok-4.1-thinking",
      "provider": "xai",
      "aliases": [
        "grok-4.1-thinking"
      ],
      "openness": null,
      "license": "Proprietary",
      "parameter_billions": null,
      "context_window": null,
      "modalities": [
        "text"
      ],
      "source_ids": [
        "lmarena"
      ],
      "composite": {
        "score_0_100": 85.72,
        "confidence": "low",
        "components": [
          {
            "source_id": "lmarena",
            "metric": "lmarena_text_overall_rating",
            "normalized_0_100": 85.72,
            "weight": 0.35
          }
        ],
        "derived_from": [
          "lmarena:lmarena_text_overall_rating"
        ],
        "limitations": [
          "This is an external model-intelligence prior, not a RunLocalAI local speed or fit measurement.",
          "Provider papers, human-preference ratings, and LiveBench judgments are not interchangeable with quantized local execution.",
          "Use this score only beside Will-It-Run fit, quantization, context, runtime, and measured tok/s evidence."
        ]
      },
      "scores": [
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_chinese_rating",
          "value": 1455.8564961110712,
          "normalized_0_100": 87.75,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 76,
          "lower": 1444.4547731095315,
          "upper": 1467.2582191126112,
          "category": "chinese",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_coding_rating",
          "value": 1446.6352640549396,
          "normalized_0_100": 86.65,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 56,
          "lower": 1440.717075730879,
          "upper": 1452.553452379,
          "category": "coding",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_overall_rating",
          "value": 1438.816739956039,
          "normalized_0_100": 85.72,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 42,
          "lower": 1435.5112577036948,
          "upper": 1442.1222222083834,
          "category": "overall",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        }
      ]
    },
    {
      "model_key": "qwen3-max-preview",
      "display_name": "qwen3-max-preview",
      "provider": "alibaba",
      "aliases": [
        "qwen3-max-preview"
      ],
      "openness": null,
      "license": "Proprietary",
      "parameter_billions": null,
      "context_window": null,
      "modalities": [
        "text"
      ],
      "source_ids": [
        "lmarena"
      ],
      "composite": {
        "score_0_100": 85.72,
        "confidence": "low",
        "components": [
          {
            "source_id": "lmarena",
            "metric": "lmarena_text_overall_rating",
            "normalized_0_100": 85.72,
            "weight": 0.35
          }
        ],
        "derived_from": [
          "lmarena:lmarena_text_overall_rating"
        ],
        "limitations": [
          "This is an external model-intelligence prior, not a RunLocalAI local speed or fit measurement.",
          "Provider papers, human-preference ratings, and LiveBench judgments are not interchangeable with quantized local execution.",
          "Use this score only beside Will-It-Run fit, quantization, context, runtime, and measured tok/s evidence."
        ]
      },
      "scores": [
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_chinese_rating",
          "value": 1485.8779773547412,
          "normalized_0_100": 91.32,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 34,
          "lower": 1470.2605111385412,
          "upper": 1501.495443570941,
          "category": "chinese",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_coding_rating",
          "value": 1457.1110617348463,
          "normalized_0_100": 87.9,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 43,
          "lower": 1448.7534773102495,
          "upper": 1465.468646159443,
          "category": "coding",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_overall_rating",
          "value": 1438.8026489586575,
          "normalized_0_100": 85.72,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 43,
          "lower": 1434.3087674241995,
          "upper": 1443.2965304931158,
          "category": "overall",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        }
      ]
    },
    {
      "model_key": "microsoft-phi-3-mini-4k-instruct",
      "display_name": "microsoft/Phi-3-mini-4k-instruct",
      "provider": "microsoft",
      "aliases": [
        "microsoft/Phi-3-mini-4k-instruct"
      ],
      "openness": "open",
      "license": "Unknown",
      "parameter_billions": 3.8,
      "context_window": null,
      "modalities": [
        "text"
      ],
      "source_ids": [
        "openevals"
      ],
      "composite": {
        "score_0_100": 85.7,
        "confidence": "low",
        "components": [
          {
            "source_id": "openevals",
            "metric": "openevals_aggregate",
            "normalized_0_100": 85.7,
            "weight": 0.45
          }
        ],
        "derived_from": [
          "openevals:openevals_aggregate"
        ],
        "limitations": [
          "This is an external model-intelligence prior, not a RunLocalAI local speed or fit measurement.",
          "Provider papers, human-preference ratings, and LiveBench judgments are not interchangeable with quantized local execution.",
          "Use this score only beside Will-It-Run fit, quantization, context, runtime, and measured tok/s evidence."
        ]
      },
      "scores": [
        {
          "source_id": "openevals",
          "metric": "gsm8k",
          "value": 85.7,
          "normalized_0_100": 85.7,
          "scale": "0-100",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/OpenEvals/leaderboard-data",
          "source_dataset": "OpenEvals/leaderboard-data",
          "source_config": "default",
          "source_split": "train",
          "source_row_count": 105,
          "captured_at": "2026-05-28T11:57:28.766Z",
          "evidence_note": "External model quality prior from OpenEvals; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "openevals",
          "metric": "openevals_aggregate",
          "value": 85.7,
          "normalized_0_100": 85.7,
          "scale": "0-100",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/OpenEvals/leaderboard-data",
          "source_dataset": "OpenEvals/leaderboard-data",
          "source_config": "default",
          "source_split": "train",
          "source_row_count": 105,
          "captured_at": "2026-05-28T11:57:28.766Z",
          "evidence_note": "External model quality prior from OpenEvals; not a RunLocalAI local execution measurement."
        }
      ]
    },
    {
      "model_key": "qwen3-6-plus",
      "display_name": "qwen3.6-plus",
      "provider": "alibaba",
      "aliases": [
        "qwen3.6-plus"
      ],
      "openness": null,
      "license": "Proprietary",
      "parameter_billions": null,
      "context_window": null,
      "modalities": [
        "text"
      ],
      "source_ids": [
        "lmarena"
      ],
      "composite": {
        "score_0_100": 85.61,
        "confidence": "low",
        "components": [
          {
            "source_id": "lmarena",
            "metric": "lmarena_text_overall_rating",
            "normalized_0_100": 85.61,
            "weight": 0.35
          }
        ],
        "derived_from": [
          "lmarena:lmarena_text_overall_rating"
        ],
        "limitations": [
          "This is an external model-intelligence prior, not a RunLocalAI local speed or fit measurement.",
          "Provider papers, human-preference ratings, and LiveBench judgments are not interchangeable with quantized local execution.",
          "Use this score only beside Will-It-Run fit, quantization, context, runtime, and measured tok/s evidence."
        ]
      },
      "scores": [
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_chinese_rating",
          "value": 1469.335275570423,
          "normalized_0_100": 89.35,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 54,
          "lower": 1444.9730929566192,
          "upper": 1493.6974581842264,
          "category": "chinese",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_coding_rating",
          "value": 1465.9145938144281,
          "normalized_0_100": 88.95,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 34,
          "lower": 1455.7025413917202,
          "upper": 1476.1266462371364,
          "category": "coding",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_overall_rating",
          "value": 1437.883346921693,
          "normalized_0_100": 85.61,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 44,
          "lower": 1432.1714514490136,
          "upper": 1443.5952423943725,
          "category": "overall",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        }
      ]
    },
    {
      "model_key": "grok-4-1",
      "display_name": "grok-4.1",
      "provider": "xai",
      "aliases": [
        "grok-4.1"
      ],
      "openness": null,
      "license": "Proprietary",
      "parameter_billions": null,
      "context_window": null,
      "modalities": [
        "text"
      ],
      "source_ids": [
        "lmarena"
      ],
      "composite": {
        "score_0_100": 85.56,
        "confidence": "low",
        "components": [
          {
            "source_id": "lmarena",
            "metric": "lmarena_text_overall_rating",
            "normalized_0_100": 85.56,
            "weight": 0.35
          }
        ],
        "derived_from": [
          "lmarena:lmarena_text_overall_rating"
        ],
        "limitations": [
          "This is an external model-intelligence prior, not a RunLocalAI local speed or fit measurement.",
          "Provider papers, human-preference ratings, and LiveBench judgments are not interchangeable with quantized local execution.",
          "Use this score only beside Will-It-Run fit, quantization, context, runtime, and measured tok/s evidence."
        ]
      },
      "scores": [
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_chinese_rating",
          "value": 1472.2438748747495,
          "normalized_0_100": 89.7,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 51,
          "lower": 1461.2132788790952,
          "upper": 1483.274470870404,
          "category": "chinese",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_coding_rating",
          "value": 1443.718851425742,
          "normalized_0_100": 86.31,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 63,
          "lower": 1437.9041076545855,
          "upper": 1449.5335951968984,
          "category": "coding",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_overall_rating",
          "value": 1437.4066842338668,
          "normalized_0_100": 85.56,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 45,
          "lower": 1434.1320174351415,
          "upper": 1440.6813510325921,
          "category": "overall",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        }
      ]
    },
    {
      "model_key": "glm-4-7",
      "display_name": "glm-4.7",
      "provider": "zai",
      "aliases": [
        "glm-4.7"
      ],
      "openness": null,
      "license": "MIT",
      "parameter_billions": null,
      "context_window": null,
      "modalities": [
        "text"
      ],
      "source_ids": [
        "lmarena"
      ],
      "composite": {
        "score_0_100": 85.41,
        "confidence": "low",
        "components": [
          {
            "source_id": "lmarena",
            "metric": "lmarena_text_overall_rating",
            "normalized_0_100": 85.41,
            "weight": 0.35
          }
        ],
        "derived_from": [
          "lmarena:lmarena_text_overall_rating"
        ],
        "limitations": [
          "This is an external model-intelligence prior, not a RunLocalAI local speed or fit measurement.",
          "Provider papers, human-preference ratings, and LiveBench judgments are not interchangeable with quantized local execution.",
          "Use this score only beside Will-It-Run fit, quantization, context, runtime, and measured tok/s evidence."
        ]
      },
      "scores": [
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_chinese_rating",
          "value": 1483.8973635455243,
          "normalized_0_100": 91.09,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 36,
          "lower": 1461.7133951755718,
          "upper": 1506.0813319154768,
          "category": "chinese",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_coding_rating",
          "value": 1456.250735237515,
          "normalized_0_100": 87.8,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 45,
          "lower": 1444.3076871019316,
          "upper": 1468.1937833730985,
          "category": "coding",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_overall_rating",
          "value": 1436.1982449472869,
          "normalized_0_100": 85.41,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 46,
          "lower": 1430.1243698039118,
          "upper": 1442.272120090662,
          "category": "overall",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        }
      ]
    },
    {
      "model_key": "claude-sonnet-4-5-20250929",
      "display_name": "claude-sonnet-4-5-20250929",
      "provider": "anthropic",
      "aliases": [
        "claude-sonnet-4-5-20250929"
      ],
      "openness": null,
      "license": "Proprietary",
      "parameter_billions": null,
      "context_window": null,
      "modalities": [
        "text"
      ],
      "source_ids": [
        "lmarena"
      ],
      "composite": {
        "score_0_100": 85.37,
        "confidence": "low",
        "components": [
          {
            "source_id": "lmarena",
            "metric": "lmarena_text_overall_rating",
            "normalized_0_100": 85.37,
            "weight": 0.35
          }
        ],
        "derived_from": [
          "lmarena:lmarena_text_overall_rating"
        ],
        "limitations": [
          "This is an external model-intelligence prior, not a RunLocalAI local speed or fit measurement.",
          "Provider papers, human-preference ratings, and LiveBench judgments are not interchangeable with quantized local execution.",
          "Use this score only beside Will-It-Run fit, quantization, context, runtime, and measured tok/s evidence."
        ]
      },
      "scores": [
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_chinese_rating",
          "value": 1457.8370715299422,
          "normalized_0_100": 87.99,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 72,
          "lower": 1446.6265442547558,
          "upper": 1469.0475988051287,
          "category": "chinese",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_coding_rating",
          "value": 1484.6826421607968,
          "normalized_0_100": 91.18,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 20,
          "lower": 1479.1569820565353,
          "upper": 1490.2083022650584,
          "category": "coding",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_overall_rating",
          "value": 1435.8668449505824,
          "normalized_0_100": 85.37,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 47,
          "lower": 1432.8239059833638,
          "upper": 1438.909783917801,
          "category": "overall",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        }
      ]
    },
    {
      "model_key": "gemma-4-26b-a4b",
      "display_name": "gemma-4-26b-a4b",
      "provider": "google",
      "aliases": [
        "gemma-4-26b-a4b"
      ],
      "openness": null,
      "license": "Apache 2.0",
      "parameter_billions": null,
      "context_window": null,
      "modalities": [
        "text"
      ],
      "source_ids": [
        "lmarena"
      ],
      "composite": {
        "score_0_100": 85.24,
        "confidence": "low",
        "components": [
          {
            "source_id": "lmarena",
            "metric": "lmarena_text_overall_rating",
            "normalized_0_100": 85.24,
            "weight": 0.35
          }
        ],
        "derived_from": [
          "lmarena:lmarena_text_overall_rating"
        ],
        "limitations": [
          "This is an external model-intelligence prior, not a RunLocalAI local speed or fit measurement.",
          "Provider papers, human-preference ratings, and LiveBench judgments are not interchangeable with quantized local execution.",
          "Use this score only beside Will-It-Run fit, quantization, context, runtime, and measured tok/s evidence."
        ]
      },
      "scores": [
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_chinese_rating",
          "value": 1479.5713242576155,
          "normalized_0_100": 90.57,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 40,
          "lower": 1449.7250021723762,
          "upper": 1509.4176463428548,
          "category": "chinese",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_coding_rating",
          "value": 1443.1952893439918,
          "normalized_0_100": 86.24,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 66,
          "lower": 1427.9004497306578,
          "upper": 1458.4901289573259,
          "category": "coding",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_overall_rating",
          "value": 1434.7677503205985,
          "normalized_0_100": 85.24,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 48,
          "lower": 1427.1340168584904,
          "upper": 1442.4014837827065,
          "category": "overall",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        }
      ]
    },
    {
      "model_key": "mimo-v2-pro",
      "display_name": "mimo-v2-pro",
      "provider": "xiaomi",
      "aliases": [
        "mimo-v2-pro"
      ],
      "openness": null,
      "license": "Proprietary",
      "parameter_billions": null,
      "context_window": null,
      "modalities": [
        "text"
      ],
      "source_ids": [
        "lmarena"
      ],
      "composite": {
        "score_0_100": 85.17,
        "confidence": "low",
        "components": [
          {
            "source_id": "lmarena",
            "metric": "lmarena_text_overall_rating",
            "normalized_0_100": 85.17,
            "weight": 0.35
          }
        ],
        "derived_from": [
          "lmarena:lmarena_text_overall_rating"
        ],
        "limitations": [
          "This is an external model-intelligence prior, not a RunLocalAI local speed or fit measurement.",
          "Provider papers, human-preference ratings, and LiveBench judgments are not interchangeable with quantized local execution.",
          "Use this score only beside Will-It-Run fit, quantization, context, runtime, and measured tok/s evidence."
        ]
      },
      "scores": [
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_chinese_rating",
          "value": 1455.5229792470795,
          "normalized_0_100": 87.71,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 77,
          "lower": 1435.3840160271666,
          "upper": 1475.6619424669927,
          "category": "chinese",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_coding_rating",
          "value": 1476.5616419035796,
          "normalized_0_100": 90.21,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 28,
          "lower": 1467.9549101597224,
          "upper": 1485.1683736474367,
          "category": "coding",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_overall_rating",
          "value": 1434.1891392826226,
          "normalized_0_100": 85.17,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 49,
          "lower": 1429.1670144037323,
          "upper": 1439.2112641615126,
          "category": "overall",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        }
      ]
    },
    {
      "model_key": "deepseek-ai-deepseek-r1-0528",
      "display_name": "deepseek-ai/DeepSeek-R1-0528",
      "provider": "deepseek-ai",
      "aliases": [
        "deepseek-ai/DeepSeek-R1-0528"
      ],
      "openness": "open",
      "license": "Unknown",
      "parameter_billions": 684.5,
      "context_window": null,
      "modalities": [
        "text"
      ],
      "source_ids": [
        "openevals"
      ],
      "composite": {
        "score_0_100": 85,
        "confidence": "low",
        "components": [
          {
            "source_id": "openevals",
            "metric": "openevals_aggregate",
            "normalized_0_100": 85,
            "weight": 0.45
          }
        ],
        "derived_from": [
          "openevals:openevals_aggregate"
        ],
        "limitations": [
          "This is an external model-intelligence prior, not a RunLocalAI local speed or fit measurement.",
          "Provider papers, human-preference ratings, and LiveBench judgments are not interchangeable with quantized local execution.",
          "Use this score only beside Will-It-Run fit, quantization, context, runtime, and measured tok/s evidence."
        ]
      },
      "scores": [
        {
          "source_id": "openevals",
          "metric": "mmlu_pro",
          "value": 85,
          "normalized_0_100": 85,
          "scale": "0-100",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/OpenEvals/leaderboard-data",
          "source_dataset": "OpenEvals/leaderboard-data",
          "source_config": "default",
          "source_split": "train",
          "source_row_count": 105,
          "captured_at": "2026-05-28T11:57:28.766Z",
          "evidence_note": "External model quality prior from OpenEvals; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "openevals",
          "metric": "openevals_aggregate",
          "value": 85,
          "normalized_0_100": 85,
          "scale": "0-100",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/OpenEvals/leaderboard-data",
          "source_dataset": "OpenEvals/leaderboard-data",
          "source_config": "default",
          "source_split": "train",
          "source_row_count": 105,
          "captured_at": "2026-05-28T11:57:28.766Z",
          "evidence_note": "External model quality prior from OpenEvals; not a RunLocalAI local execution measurement."
        }
      ]
    },
    {
      "model_key": "claude-sonnet-4-5-20250929-thinking-32k",
      "display_name": "claude-sonnet-4-5-20250929-thinking-32k",
      "provider": "anthropic",
      "aliases": [
        "claude-sonnet-4-5-20250929-thinking-32k"
      ],
      "openness": null,
      "license": "Proprietary",
      "parameter_billions": null,
      "context_window": null,
      "modalities": [
        "text"
      ],
      "source_ids": [
        "lmarena"
      ],
      "composite": {
        "score_0_100": 84.73,
        "confidence": "low",
        "components": [
          {
            "source_id": "lmarena",
            "metric": "lmarena_text_overall_rating",
            "normalized_0_100": 84.73,
            "weight": 0.35
          }
        ],
        "derived_from": [
          "lmarena:lmarena_text_overall_rating"
        ],
        "limitations": [
          "This is an external model-intelligence prior, not a RunLocalAI local speed or fit measurement.",
          "Provider papers, human-preference ratings, and LiveBench judgments are not interchangeable with quantized local execution.",
          "Use this score only beside Will-It-Run fit, quantization, context, runtime, and measured tok/s evidence."
        ]
      },
      "scores": [
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_chinese_rating",
          "value": 1457.0897035684097,
          "normalized_0_100": 87.9,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 75,
          "lower": 1446.0737077951235,
          "upper": 1468.1056993416958,
          "category": "chinese",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_coding_rating",
          "value": 1486.9356675456759,
          "normalized_0_100": 91.45,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 17,
          "lower": 1481.5769996421434,
          "upper": 1492.294335449208,
          "category": "coding",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_overall_rating",
          "value": 1430.4530064377832,
          "normalized_0_100": 84.73,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 50,
          "lower": 1427.5250427490903,
          "upper": 1433.3809701264759,
          "category": "overall",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        }
      ]
    },
    {
      "model_key": "mistral-large-3",
      "display_name": "mistral-large-3",
      "provider": "mistral",
      "aliases": [
        "mistral-large-3"
      ],
      "openness": null,
      "license": "Apache 2.0",
      "parameter_billions": null,
      "context_window": null,
      "modalities": [
        "text"
      ],
      "source_ids": [
        "lmarena"
      ],
      "composite": {
        "score_0_100": 84.71,
        "confidence": "low",
        "components": [
          {
            "source_id": "lmarena",
            "metric": "lmarena_text_overall_rating",
            "normalized_0_100": 84.71,
            "weight": 0.35
          }
        ],
        "derived_from": [
          "lmarena:lmarena_text_overall_rating"
        ],
        "limitations": [
          "This is an external model-intelligence prior, not a RunLocalAI local speed or fit measurement.",
          "Provider papers, human-preference ratings, and LiveBench judgments are not interchangeable with quantized local execution.",
          "Use this score only beside Will-It-Run fit, quantization, context, runtime, and measured tok/s evidence."
        ]
      },
      "scores": [
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_chinese_rating",
          "value": 1444.5430796168869,
          "normalized_0_100": 86.41,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 93,
          "lower": 1430.8161110392728,
          "upper": 1458.2700481945012,
          "category": "chinese",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_coding_rating",
          "value": 1445.3248664078926,
          "normalized_0_100": 86.5,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 59,
          "lower": 1438.7990312300792,
          "upper": 1451.8507015857058,
          "category": "coding",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_overall_rating",
          "value": 1430.286011994361,
          "normalized_0_100": 84.71,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 51,
          "lower": 1426.6824217751096,
          "upper": 1433.8896022136123,
          "category": "overall",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        }
      ]
    },
    {
      "model_key": "ernie-5-0-preview-1022",
      "display_name": "ernie-5.0-preview-1022",
      "provider": "baidu",
      "aliases": [
        "ernie-5.0-preview-1022"
      ],
      "openness": null,
      "license": "Proprietary",
      "parameter_billions": null,
      "context_window": null,
      "modalities": [
        "text"
      ],
      "source_ids": [
        "lmarena"
      ],
      "composite": {
        "score_0_100": 84.67,
        "confidence": "low",
        "components": [
          {
            "source_id": "lmarena",
            "metric": "lmarena_text_overall_rating",
            "normalized_0_100": 84.67,
            "weight": 0.35
          }
        ],
        "derived_from": [
          "lmarena:lmarena_text_overall_rating"
        ],
        "limitations": [
          "This is an external model-intelligence prior, not a RunLocalAI local speed or fit measurement.",
          "Provider papers, human-preference ratings, and LiveBench judgments are not interchangeable with quantized local execution.",
          "Use this score only beside Will-It-Run fit, quantization, context, runtime, and measured tok/s evidence."
        ]
      },
      "scores": [
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_chinese_rating",
          "value": 1503.4967960216507,
          "normalized_0_100": 93.42,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 25,
          "lower": 1468.9694248297926,
          "upper": 1538.024167213509,
          "category": "chinese",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_coding_rating",
          "value": 1412.9419109463104,
          "normalized_0_100": 82.65,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 106,
          "lower": 1394.2339504404456,
          "upper": 1431.6498714521754,
          "category": "coding",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_overall_rating",
          "value": 1429.9550444593667,
          "normalized_0_100": 84.67,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 52,
          "lower": 1421.1332008390464,
          "upper": 1438.7768880796873,
          "category": "overall",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        }
      ]
    },
    {
      "model_key": "chatgpt-4o-latest-20250326",
      "display_name": "chatgpt-4o-latest-20250326",
      "provider": "openai",
      "aliases": [
        "chatgpt-4o-latest-20250326"
      ],
      "openness": null,
      "license": "Proprietary",
      "parameter_billions": null,
      "context_window": null,
      "modalities": [
        "text"
      ],
      "source_ids": [
        "lmarena"
      ],
      "composite": {
        "score_0_100": 84.54,
        "confidence": "low",
        "components": [
          {
            "source_id": "lmarena",
            "metric": "lmarena_text_overall_rating",
            "normalized_0_100": 84.54,
            "weight": 0.35
          }
        ],
        "derived_from": [
          "lmarena:lmarena_text_overall_rating"
        ],
        "limitations": [
          "This is an external model-intelligence prior, not a RunLocalAI local speed or fit measurement.",
          "Provider papers, human-preference ratings, and LiveBench judgments are not interchangeable with quantized local execution.",
          "Use this score only beside Will-It-Run fit, quantization, context, runtime, and measured tok/s evidence."
        ]
      },
      "scores": [
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_chinese_rating",
          "value": 1452.6107646355322,
          "normalized_0_100": 87.36,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 82,
          "lower": 1443.1847039926097,
          "upper": 1462.0368252784544,
          "category": "chinese",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_coding_rating",
          "value": 1414.844235117393,
          "normalized_0_100": 82.87,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 102,
          "lower": 1409.7413243765054,
          "upper": 1419.9471458582807,
          "category": "coding",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_overall_rating",
          "value": 1428.820823877532,
          "normalized_0_100": 84.54,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 55,
          "lower": 1426.0233115568942,
          "upper": 1431.6183361981693,
          "category": "overall",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        }
      ]
    },
    {
      "model_key": "deepseek-v4-flash",
      "display_name": "deepseek-v4-flash",
      "provider": "deepseek",
      "aliases": [
        "deepseek-v4-flash"
      ],
      "openness": null,
      "license": "MIT",
      "parameter_billions": null,
      "context_window": null,
      "modalities": [
        "text"
      ],
      "source_ids": [
        "lmarena"
      ],
      "composite": {
        "score_0_100": 84.54,
        "confidence": "low",
        "components": [
          {
            "source_id": "lmarena",
            "metric": "lmarena_text_overall_rating",
            "normalized_0_100": 84.54,
            "weight": 0.35
          }
        ],
        "derived_from": [
          "lmarena:lmarena_text_overall_rating"
        ],
        "limitations": [
          "This is an external model-intelligence prior, not a RunLocalAI local speed or fit measurement.",
          "Provider papers, human-preference ratings, and LiveBench judgments are not interchangeable with quantized local execution.",
          "Use this score only beside Will-It-Run fit, quantization, context, runtime, and measured tok/s evidence."
        ]
      },
      "scores": [
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_chinese_rating",
          "value": 1474.823008029778,
          "normalized_0_100": 90.01,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 45,
          "lower": 1448.8590496805748,
          "upper": 1500.7869663789813,
          "category": "chinese",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_coding_rating",
          "value": 1451.2043597115978,
          "normalized_0_100": 87.2,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 51,
          "lower": 1439.8070700558098,
          "upper": 1462.601649367386,
          "category": "coding",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_overall_rating",
          "value": 1428.8955857131884,
          "normalized_0_100": 84.54,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 53,
          "lower": 1422.6714095931288,
          "upper": 1435.1197618332478,
          "category": "overall",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        }
      ]
    },
    {
      "model_key": "glm-4-5",
      "display_name": "glm-4.5",
      "provider": "zai",
      "aliases": [
        "glm-4.5"
      ],
      "openness": null,
      "license": "MIT",
      "parameter_billions": null,
      "context_window": null,
      "modalities": [
        "text"
      ],
      "source_ids": [
        "lmarena"
      ],
      "composite": {
        "score_0_100": 84.54,
        "confidence": "low",
        "components": [
          {
            "source_id": "lmarena",
            "metric": "lmarena_text_overall_rating",
            "normalized_0_100": 84.54,
            "weight": 0.35
          }
        ],
        "derived_from": [
          "lmarena:lmarena_text_overall_rating"
        ],
        "limitations": [
          "This is an external model-intelligence prior, not a RunLocalAI local speed or fit measurement.",
          "Provider papers, human-preference ratings, and LiveBench judgments are not interchangeable with quantized local execution.",
          "Use this score only beside Will-It-Run fit, quantization, context, runtime, and measured tok/s evidence."
        ]
      },
      "scores": [
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_chinese_rating",
          "value": 1469.1407770211767,
          "normalized_0_100": 89.33,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 55,
          "lower": 1452.435913193442,
          "upper": 1485.8456408489114,
          "category": "chinese",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_coding_rating",
          "value": 1433.3682593859398,
          "normalized_0_100": 85.08,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 82,
          "lower": 1424.6382886147596,
          "upper": 1442.09823015712,
          "category": "coding",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_overall_rating",
          "value": 1428.879138255765,
          "normalized_0_100": 84.54,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 54,
          "lower": 1424.0240949268746,
          "upper": 1433.7341815846553,
          "category": "overall",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        }
      ]
    },
    {
      "model_key": "deepseek-r1-0528",
      "display_name": "deepseek-r1-0528",
      "provider": "deepseek",
      "aliases": [
        "deepseek-r1-0528"
      ],
      "openness": null,
      "license": "MIT",
      "parameter_billions": null,
      "context_window": null,
      "modalities": [
        "text"
      ],
      "source_ids": [
        "lmarena"
      ],
      "composite": {
        "score_0_100": 84.44,
        "confidence": "low",
        "components": [
          {
            "source_id": "lmarena",
            "metric": "lmarena_text_overall_rating",
            "normalized_0_100": 84.44,
            "weight": 0.35
          }
        ],
        "derived_from": [
          "lmarena:lmarena_text_overall_rating"
        ],
        "limitations": [
          "This is an external model-intelligence prior, not a RunLocalAI local speed or fit measurement.",
          "Provider papers, human-preference ratings, and LiveBench judgments are not interchangeable with quantized local execution.",
          "Use this score only beside Will-It-Run fit, quantization, context, runtime, and measured tok/s evidence."
        ]
      },
      "scores": [
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_chinese_rating",
          "value": 1459.5425789193791,
          "normalized_0_100": 88.19,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 70,
          "lower": 1440.1664472553905,
          "upper": 1478.918710583368,
          "category": "chinese",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_coding_rating",
          "value": 1427.0417710290835,
          "normalized_0_100": 84.32,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 91,
          "lower": 1415.718794653408,
          "upper": 1438.364747404759,
          "category": "coding",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_overall_rating",
          "value": 1427.9813831729944,
          "normalized_0_100": 84.44,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 56,
          "lower": 1422.3533798182157,
          "upper": 1433.609386527773,
          "category": "overall",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        }
      ]
    },
    {
      "model_key": "qwen-qwen3-235b-a22b-thinking-2507",
      "display_name": "Qwen/Qwen3-235B-A22B-Thinking-2507",
      "provider": "Qwen",
      "aliases": [
        "Qwen/Qwen3-235B-A22B-Thinking-2507"
      ],
      "openness": "open",
      "license": "Unknown",
      "parameter_billions": null,
      "context_window": null,
      "modalities": [
        "text"
      ],
      "source_ids": [
        "openevals"
      ],
      "composite": {
        "score_0_100": 84.4,
        "confidence": "low",
        "components": [
          {
            "source_id": "openevals",
            "metric": "openevals_aggregate",
            "normalized_0_100": 84.4,
            "weight": 0.45
          }
        ],
        "derived_from": [
          "openevals:openevals_aggregate"
        ],
        "limitations": [
          "This is an external model-intelligence prior, not a RunLocalAI local speed or fit measurement.",
          "Provider papers, human-preference ratings, and LiveBench judgments are not interchangeable with quantized local execution.",
          "Use this score only beside Will-It-Run fit, quantization, context, runtime, and measured tok/s evidence."
        ]
      },
      "scores": [
        {
          "source_id": "openevals",
          "metric": "mmlu_pro",
          "value": 84.4,
          "normalized_0_100": 84.4,
          "scale": "0-100",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/OpenEvals/leaderboard-data",
          "source_dataset": "OpenEvals/leaderboard-data",
          "source_config": "default",
          "source_split": "train",
          "source_row_count": 105,
          "captured_at": "2026-05-28T11:57:28.766Z",
          "evidence_note": "External model quality prior from OpenEvals; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "openevals",
          "metric": "openevals_aggregate",
          "value": 84.4,
          "normalized_0_100": 84.4,
          "scale": "0-100",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/OpenEvals/leaderboard-data",
          "source_dataset": "OpenEvals/leaderboard-data",
          "source_config": "default",
          "source_split": "train",
          "source_row_count": 105,
          "captured_at": "2026-05-28T11:57:28.766Z",
          "evidence_note": "External model quality prior from OpenEvals; not a RunLocalAI local execution measurement."
        }
      ]
    },
    {
      "model_key": "longcat-flash-chat-2602-exp",
      "display_name": "longcat-flash-chat-2602-exp",
      "provider": "meituan",
      "aliases": [
        "longcat-flash-chat-2602-exp"
      ],
      "openness": null,
      "license": "Proprietary",
      "parameter_billions": null,
      "context_window": null,
      "modalities": [
        "text"
      ],
      "source_ids": [
        "lmarena"
      ],
      "composite": {
        "score_0_100": 84.2,
        "confidence": "low",
        "components": [
          {
            "source_id": "lmarena",
            "metric": "lmarena_text_overall_rating",
            "normalized_0_100": 84.2,
            "weight": 0.35
          }
        ],
        "derived_from": [
          "lmarena:lmarena_text_overall_rating"
        ],
        "limitations": [
          "This is an external model-intelligence prior, not a RunLocalAI local speed or fit measurement.",
          "Provider papers, human-preference ratings, and LiveBench judgments are not interchangeable with quantized local execution.",
          "Use this score only beside Will-It-Run fit, quantization, context, runtime, and measured tok/s evidence."
        ]
      },
      "scores": [
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_chinese_rating",
          "value": 1466.3884723682063,
          "normalized_0_100": 89,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 60,
          "lower": 1446.3198822465115,
          "upper": 1486.457062489901,
          "category": "chinese",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_coding_rating",
          "value": 1473.3064613562333,
          "normalized_0_100": 89.83,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 31,
          "lower": 1464.2860512241257,
          "upper": 1482.3268714883409,
          "category": "coding",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_overall_rating",
          "value": 1425.9744011405674,
          "normalized_0_100": 84.2,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 57,
          "lower": 1420.7636159576055,
          "upper": 1431.185186323529,
          "category": "overall",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        }
      ]
    },
    {
      "model_key": "deepseek-v4-flash-thinking",
      "display_name": "deepseek-v4-flash-thinking",
      "provider": "deepseek",
      "aliases": [
        "deepseek-v4-flash-thinking"
      ],
      "openness": null,
      "license": "MIT",
      "parameter_billions": null,
      "context_window": null,
      "modalities": [
        "text"
      ],
      "source_ids": [
        "lmarena"
      ],
      "composite": {
        "score_0_100": 84.19,
        "confidence": "low",
        "components": [
          {
            "source_id": "lmarena",
            "metric": "lmarena_text_overall_rating",
            "normalized_0_100": 84.19,
            "weight": 0.35
          }
        ],
        "derived_from": [
          "lmarena:lmarena_text_overall_rating"
        ],
        "limitations": [
          "This is an external model-intelligence prior, not a RunLocalAI local speed or fit measurement.",
          "Provider papers, human-preference ratings, and LiveBench judgments are not interchangeable with quantized local execution.",
          "Use this score only beside Will-It-Run fit, quantization, context, runtime, and measured tok/s evidence."
        ]
      },
      "scores": [
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_chinese_rating",
          "value": 1481.1604848219706,
          "normalized_0_100": 90.76,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 38,
          "lower": 1456.2000169601022,
          "upper": 1506.120952683839,
          "category": "chinese",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_coding_rating",
          "value": 1444.1890395733265,
          "normalized_0_100": 86.36,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 62,
          "lower": 1432.6546842937248,
          "upper": 1455.7233948529279,
          "category": "coding",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_overall_rating",
          "value": 1425.9315303444541,
          "normalized_0_100": 84.19,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 58,
          "lower": 1419.7144076456993,
          "upper": 1432.1486530432092,
          "category": "overall",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        }
      ]
    },
    {
      "model_key": "mistral-medium-2508",
      "display_name": "mistral-medium-2508",
      "provider": "mistral",
      "aliases": [
        "mistral-medium-2508"
      ],
      "openness": null,
      "license": "Proprietary",
      "parameter_billions": null,
      "context_window": null,
      "modalities": [
        "text"
      ],
      "source_ids": [
        "lmarena"
      ],
      "composite": {
        "score_0_100": 84.16,
        "confidence": "low",
        "components": [
          {
            "source_id": "lmarena",
            "metric": "lmarena_text_overall_rating",
            "normalized_0_100": 84.16,
            "weight": 0.35
          }
        ],
        "derived_from": [
          "lmarena:lmarena_text_overall_rating"
        ],
        "limitations": [
          "This is an external model-intelligence prior, not a RunLocalAI local speed or fit measurement.",
          "Provider papers, human-preference ratings, and LiveBench judgments are not interchangeable with quantized local execution.",
          "Use this score only beside Will-It-Run fit, quantization, context, runtime, and measured tok/s evidence."
        ]
      },
      "scores": [
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_chinese_rating",
          "value": 1447.9584898779895,
          "normalized_0_100": 86.81,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 91,
          "lower": 1438.5375125080411,
          "upper": 1457.3794672479378,
          "category": "chinese",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_coding_rating",
          "value": 1434.0559913760085,
          "normalized_0_100": 85.16,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 80,
          "lower": 1429.2562989262021,
          "upper": 1438.8556838258146,
          "category": "coding",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_overall_rating",
          "value": 1425.6948265732333,
          "normalized_0_100": 84.16,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 59,
          "lower": 1423.0264479613245,
          "upper": 1428.363205185142,
          "category": "overall",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        }
      ]
    },
    {
      "model_key": "grok-3-preview-02-24",
      "display_name": "grok-3-preview-02-24",
      "provider": "xai",
      "aliases": [
        "grok-3-preview-02-24"
      ],
      "openness": null,
      "license": "Proprietary",
      "parameter_billions": null,
      "context_window": null,
      "modalities": [
        "text"
      ],
      "source_ids": [
        "lmarena"
      ],
      "composite": {
        "score_0_100": 84.06,
        "confidence": "low",
        "components": [
          {
            "source_id": "lmarena",
            "metric": "lmarena_text_overall_rating",
            "normalized_0_100": 84.06,
            "weight": 0.35
          }
        ],
        "derived_from": [
          "lmarena:lmarena_text_overall_rating"
        ],
        "limitations": [
          "This is an external model-intelligence prior, not a RunLocalAI local speed or fit measurement.",
          "Provider papers, human-preference ratings, and LiveBench judgments are not interchangeable with quantized local execution.",
          "Use this score only beside Will-It-Run fit, quantization, context, runtime, and measured tok/s evidence."
        ]
      },
      "scores": [
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_chinese_rating",
          "value": 1447.89539524684,
          "normalized_0_100": 86.8,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 92,
          "lower": 1433.4671602877359,
          "upper": 1462.3236302059445,
          "category": "chinese",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_coding_rating",
          "value": 1432.4475181931698,
          "normalized_0_100": 84.97,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 83,
          "lower": 1424.1876690337695,
          "upper": 1440.7073673525701,
          "category": "coding",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_overall_rating",
          "value": 1424.817180074934,
          "normalized_0_100": 84.06,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 60,
          "lower": 1420.5430792465133,
          "upper": 1429.0912809033546,
          "category": "overall",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        }
      ]
    },
    {
      "model_key": "deepseek-v3-2-exp-thinking",
      "display_name": "deepseek-v3.2-exp-thinking",
      "provider": "deepseek",
      "aliases": [
        "deepseek-v3.2-exp-thinking"
      ],
      "openness": null,
      "license": "MIT",
      "parameter_billions": null,
      "context_window": null,
      "modalities": [
        "text"
      ],
      "source_ids": [
        "lmarena"
      ],
      "composite": {
        "score_0_100": 83.93,
        "confidence": "low",
        "components": [
          {
            "source_id": "lmarena",
            "metric": "lmarena_text_overall_rating",
            "normalized_0_100": 83.93,
            "weight": 0.35
          }
        ],
        "derived_from": [
          "lmarena:lmarena_text_overall_rating"
        ],
        "limitations": [
          "This is an external model-intelligence prior, not a RunLocalAI local speed or fit measurement.",
          "Provider papers, human-preference ratings, and LiveBench judgments are not interchangeable with quantized local execution.",
          "Use this score only beside Will-It-Run fit, quantization, context, runtime, and measured tok/s evidence."
        ]
      },
      "scores": [
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_chinese_rating",
          "value": 1454.6999092515996,
          "normalized_0_100": 87.61,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 78,
          "lower": 1421.7791267273426,
          "upper": 1487.6206917758568,
          "category": "chinese",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_coding_rating",
          "value": 1438.3669174856861,
          "normalized_0_100": 85.67,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 73,
          "lower": 1425.0445270988605,
          "upper": 1451.689307872512,
          "category": "coding",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_overall_rating",
          "value": 1423.72274349984,
          "normalized_0_100": 83.93,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 61,
          "lower": 1417.1851405047967,
          "upper": 1430.2603464948832,
          "category": "overall",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        }
      ]
    },
    {
      "model_key": "mlx-community-dots-mocr-4bit",
      "display_name": "mlx-community/dots.mocr-4bit",
      "provider": "mlx-community",
      "aliases": [
        "mlx-community/dots.mocr-4bit"
      ],
      "openness": "open",
      "license": "Unknown",
      "parameter_billions": 1.5,
      "context_window": null,
      "modalities": [
        "text"
      ],
      "source_ids": [
        "openevals"
      ],
      "composite": {
        "score_0_100": 83.9,
        "confidence": "low",
        "components": [
          {
            "source_id": "openevals",
            "metric": "openevals_aggregate",
            "normalized_0_100": 83.9,
            "weight": 0.45
          }
        ],
        "derived_from": [
          "openevals:openevals_aggregate"
        ],
        "limitations": [
          "This is an external model-intelligence prior, not a RunLocalAI local speed or fit measurement.",
          "Provider papers, human-preference ratings, and LiveBench judgments are not interchangeable with quantized local execution.",
          "Use this score only beside Will-It-Run fit, quantization, context, runtime, and measured tok/s evidence."
        ]
      },
      "scores": [
        {
          "source_id": "openevals",
          "metric": "olm_ocr",
          "value": 83.9,
          "normalized_0_100": 83.9,
          "scale": "0-100",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/OpenEvals/leaderboard-data",
          "source_dataset": "OpenEvals/leaderboard-data",
          "source_config": "default",
          "source_split": "train",
          "source_row_count": 105,
          "captured_at": "2026-05-28T11:57:28.766Z",
          "evidence_note": "External model quality prior from OpenEvals; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "openevals",
          "metric": "openevals_aggregate",
          "value": 83.9,
          "normalized_0_100": 83.9,
          "scale": "0-100",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/OpenEvals/leaderboard-data",
          "source_dataset": "OpenEvals/leaderboard-data",
          "source_config": "default",
          "source_split": "train",
          "source_row_count": 105,
          "captured_at": "2026-05-28T11:57:28.766Z",
          "evidence_note": "External model quality prior from OpenEvals; not a RunLocalAI local execution measurement."
        }
      ]
    },
    {
      "model_key": "mlx-community-dots-mocr-5bit",
      "display_name": "mlx-community/dots.mocr-5bit",
      "provider": "mlx-community",
      "aliases": [
        "mlx-community/dots.mocr-5bit"
      ],
      "openness": "open",
      "license": "Unknown",
      "parameter_billions": 1.6,
      "context_window": null,
      "modalities": [
        "text"
      ],
      "source_ids": [
        "openevals"
      ],
      "composite": {
        "score_0_100": 83.9,
        "confidence": "low",
        "components": [
          {
            "source_id": "openevals",
            "metric": "openevals_aggregate",
            "normalized_0_100": 83.9,
            "weight": 0.45
          }
        ],
        "derived_from": [
          "openevals:openevals_aggregate"
        ],
        "limitations": [
          "This is an external model-intelligence prior, not a RunLocalAI local speed or fit measurement.",
          "Provider papers, human-preference ratings, and LiveBench judgments are not interchangeable with quantized local execution.",
          "Use this score only beside Will-It-Run fit, quantization, context, runtime, and measured tok/s evidence."
        ]
      },
      "scores": [
        {
          "source_id": "openevals",
          "metric": "olm_ocr",
          "value": 83.9,
          "normalized_0_100": 83.9,
          "scale": "0-100",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/OpenEvals/leaderboard-data",
          "source_dataset": "OpenEvals/leaderboard-data",
          "source_config": "default",
          "source_split": "train",
          "source_row_count": 105,
          "captured_at": "2026-05-28T11:57:28.766Z",
          "evidence_note": "External model quality prior from OpenEvals; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "openevals",
          "metric": "openevals_aggregate",
          "value": 83.9,
          "normalized_0_100": 83.9,
          "scale": "0-100",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/OpenEvals/leaderboard-data",
          "source_dataset": "OpenEvals/leaderboard-data",
          "source_config": "default",
          "source_split": "train",
          "source_row_count": 105,
          "captured_at": "2026-05-28T11:57:28.766Z",
          "evidence_note": "External model quality prior from OpenEvals; not a RunLocalAI local execution measurement."
        }
      ]
    },
    {
      "model_key": "mlx-community-dots-mocr-6bit",
      "display_name": "mlx-community/dots.mocr-6bit",
      "provider": "mlx-community",
      "aliases": [
        "mlx-community/dots.mocr-6bit"
      ],
      "openness": "open",
      "license": "Unknown",
      "parameter_billions": 1.7,
      "context_window": null,
      "modalities": [
        "text"
      ],
      "source_ids": [
        "openevals"
      ],
      "composite": {
        "score_0_100": 83.9,
        "confidence": "low",
        "components": [
          {
            "source_id": "openevals",
            "metric": "openevals_aggregate",
            "normalized_0_100": 83.9,
            "weight": 0.45
          }
        ],
        "derived_from": [
          "openevals:openevals_aggregate"
        ],
        "limitations": [
          "This is an external model-intelligence prior, not a RunLocalAI local speed or fit measurement.",
          "Provider papers, human-preference ratings, and LiveBench judgments are not interchangeable with quantized local execution.",
          "Use this score only beside Will-It-Run fit, quantization, context, runtime, and measured tok/s evidence."
        ]
      },
      "scores": [
        {
          "source_id": "openevals",
          "metric": "olm_ocr",
          "value": 83.9,
          "normalized_0_100": 83.9,
          "scale": "0-100",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/OpenEvals/leaderboard-data",
          "source_dataset": "OpenEvals/leaderboard-data",
          "source_config": "default",
          "source_split": "train",
          "source_row_count": 105,
          "captured_at": "2026-05-28T11:57:28.766Z",
          "evidence_note": "External model quality prior from OpenEvals; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "openevals",
          "metric": "openevals_aggregate",
          "value": 83.9,
          "normalized_0_100": 83.9,
          "scale": "0-100",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/OpenEvals/leaderboard-data",
          "source_dataset": "OpenEvals/leaderboard-data",
          "source_config": "default",
          "source_split": "train",
          "source_row_count": 105,
          "captured_at": "2026-05-28T11:57:28.766Z",
          "evidence_note": "External model quality prior from OpenEvals; not a RunLocalAI local execution measurement."
        }
      ]
    },
    {
      "model_key": "mlx-community-dots-mocr-8bit",
      "display_name": "mlx-community/dots.mocr-8bit",
      "provider": "mlx-community",
      "aliases": [
        "mlx-community/dots.mocr-8bit"
      ],
      "openness": "open",
      "license": "Unknown",
      "parameter_billions": 1.8,
      "context_window": null,
      "modalities": [
        "text"
      ],
      "source_ids": [
        "openevals"
      ],
      "composite": {
        "score_0_100": 83.9,
        "confidence": "low",
        "components": [
          {
            "source_id": "openevals",
            "metric": "openevals_aggregate",
            "normalized_0_100": 83.9,
            "weight": 0.45
          }
        ],
        "derived_from": [
          "openevals:openevals_aggregate"
        ],
        "limitations": [
          "This is an external model-intelligence prior, not a RunLocalAI local speed or fit measurement.",
          "Provider papers, human-preference ratings, and LiveBench judgments are not interchangeable with quantized local execution.",
          "Use this score only beside Will-It-Run fit, quantization, context, runtime, and measured tok/s evidence."
        ]
      },
      "scores": [
        {
          "source_id": "openevals",
          "metric": "olm_ocr",
          "value": 83.9,
          "normalized_0_100": 83.9,
          "scale": "0-100",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/OpenEvals/leaderboard-data",
          "source_dataset": "OpenEvals/leaderboard-data",
          "source_config": "default",
          "source_split": "train",
          "source_row_count": 105,
          "captured_at": "2026-05-28T11:57:28.766Z",
          "evidence_note": "External model quality prior from OpenEvals; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "openevals",
          "metric": "openevals_aggregate",
          "value": 83.9,
          "normalized_0_100": 83.9,
          "scale": "0-100",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/OpenEvals/leaderboard-data",
          "source_dataset": "OpenEvals/leaderboard-data",
          "source_config": "default",
          "source_split": "train",
          "source_row_count": 105,
          "captured_at": "2026-05-28T11:57:28.766Z",
          "evidence_note": "External model quality prior from OpenEvals; not a RunLocalAI local execution measurement."
        }
      ]
    },
    {
      "model_key": "mlx-community-dots-mocr-bf16",
      "display_name": "mlx-community/dots.mocr-bf16",
      "provider": "mlx-community",
      "aliases": [
        "mlx-community/dots.mocr-bf16"
      ],
      "openness": "open",
      "license": "Unknown",
      "parameter_billions": 3,
      "context_window": null,
      "modalities": [
        "text"
      ],
      "source_ids": [
        "openevals"
      ],
      "composite": {
        "score_0_100": 83.9,
        "confidence": "low",
        "components": [
          {
            "source_id": "openevals",
            "metric": "openevals_aggregate",
            "normalized_0_100": 83.9,
            "weight": 0.45
          }
        ],
        "derived_from": [
          "openevals:openevals_aggregate"
        ],
        "limitations": [
          "This is an external model-intelligence prior, not a RunLocalAI local speed or fit measurement.",
          "Provider papers, human-preference ratings, and LiveBench judgments are not interchangeable with quantized local execution.",
          "Use this score only beside Will-It-Run fit, quantization, context, runtime, and measured tok/s evidence."
        ]
      },
      "scores": [
        {
          "source_id": "openevals",
          "metric": "olm_ocr",
          "value": 83.9,
          "normalized_0_100": 83.9,
          "scale": "0-100",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/OpenEvals/leaderboard-data",
          "source_dataset": "OpenEvals/leaderboard-data",
          "source_config": "default",
          "source_split": "train",
          "source_row_count": 105,
          "captured_at": "2026-05-28T11:57:28.766Z",
          "evidence_note": "External model quality prior from OpenEvals; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "openevals",
          "metric": "openevals_aggregate",
          "value": 83.9,
          "normalized_0_100": 83.9,
          "scale": "0-100",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/OpenEvals/leaderboard-data",
          "source_dataset": "OpenEvals/leaderboard-data",
          "source_config": "default",
          "source_split": "train",
          "source_row_count": 105,
          "captured_at": "2026-05-28T11:57:28.766Z",
          "evidence_note": "External model quality prior from OpenEvals; not a RunLocalAI local execution measurement."
        }
      ]
    },
    {
      "model_key": "mlx-community-dots-mocr-mxfp4",
      "display_name": "mlx-community/dots.mocr-mxfp4",
      "provider": "mlx-community",
      "aliases": [
        "mlx-community/dots.mocr-mxfp4"
      ],
      "openness": "open",
      "license": "Unknown",
      "parameter_billions": 1.6,
      "context_window": null,
      "modalities": [
        "text"
      ],
      "source_ids": [
        "openevals"
      ],
      "composite": {
        "score_0_100": 83.9,
        "confidence": "low",
        "components": [
          {
            "source_id": "openevals",
            "metric": "openevals_aggregate",
            "normalized_0_100": 83.9,
            "weight": 0.45
          }
        ],
        "derived_from": [
          "openevals:openevals_aggregate"
        ],
        "limitations": [
          "This is an external model-intelligence prior, not a RunLocalAI local speed or fit measurement.",
          "Provider papers, human-preference ratings, and LiveBench judgments are not interchangeable with quantized local execution.",
          "Use this score only beside Will-It-Run fit, quantization, context, runtime, and measured tok/s evidence."
        ]
      },
      "scores": [
        {
          "source_id": "openevals",
          "metric": "olm_ocr",
          "value": 83.9,
          "normalized_0_100": 83.9,
          "scale": "0-100",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/OpenEvals/leaderboard-data",
          "source_dataset": "OpenEvals/leaderboard-data",
          "source_config": "default",
          "source_split": "train",
          "source_row_count": 105,
          "captured_at": "2026-05-28T11:57:28.766Z",
          "evidence_note": "External model quality prior from OpenEvals; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "openevals",
          "metric": "openevals_aggregate",
          "value": 83.9,
          "normalized_0_100": 83.9,
          "scale": "0-100",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/OpenEvals/leaderboard-data",
          "source_dataset": "OpenEvals/leaderboard-data",
          "source_config": "default",
          "source_split": "train",
          "source_row_count": 105,
          "captured_at": "2026-05-28T11:57:28.766Z",
          "evidence_note": "External model quality prior from OpenEvals; not a RunLocalAI local execution measurement."
        }
      ]
    },
    {
      "model_key": "mlx-community-dots-mocr-mxfp8",
      "display_name": "mlx-community/dots.mocr-mxfp8",
      "provider": "mlx-community",
      "aliases": [
        "mlx-community/dots.mocr-mxfp8"
      ],
      "openness": "open",
      "license": "Unknown",
      "parameter_billions": 1.8,
      "context_window": null,
      "modalities": [
        "text"
      ],
      "source_ids": [
        "openevals"
      ],
      "composite": {
        "score_0_100": 83.9,
        "confidence": "low",
        "components": [
          {
            "source_id": "openevals",
            "metric": "openevals_aggregate",
            "normalized_0_100": 83.9,
            "weight": 0.45
          }
        ],
        "derived_from": [
          "openevals:openevals_aggregate"
        ],
        "limitations": [
          "This is an external model-intelligence prior, not a RunLocalAI local speed or fit measurement.",
          "Provider papers, human-preference ratings, and LiveBench judgments are not interchangeable with quantized local execution.",
          "Use this score only beside Will-It-Run fit, quantization, context, runtime, and measured tok/s evidence."
        ]
      },
      "scores": [
        {
          "source_id": "openevals",
          "metric": "olm_ocr",
          "value": 83.9,
          "normalized_0_100": 83.9,
          "scale": "0-100",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/OpenEvals/leaderboard-data",
          "source_dataset": "OpenEvals/leaderboard-data",
          "source_config": "default",
          "source_split": "train",
          "source_row_count": 105,
          "captured_at": "2026-05-28T11:57:28.766Z",
          "evidence_note": "External model quality prior from OpenEvals; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "openevals",
          "metric": "openevals_aggregate",
          "value": 83.9,
          "normalized_0_100": 83.9,
          "scale": "0-100",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/OpenEvals/leaderboard-data",
          "source_dataset": "OpenEvals/leaderboard-data",
          "source_config": "default",
          "source_split": "train",
          "source_row_count": 105,
          "captured_at": "2026-05-28T11:57:28.766Z",
          "evidence_note": "External model quality prior from OpenEvals; not a RunLocalAI local execution measurement."
        }
      ]
    },
    {
      "model_key": "mlx-community-dots-mocr-nvfp4",
      "display_name": "mlx-community/dots.mocr-nvfp4",
      "provider": "mlx-community",
      "aliases": [
        "mlx-community/dots.mocr-nvfp4"
      ],
      "openness": "open",
      "license": "Unknown",
      "parameter_billions": 1.7,
      "context_window": null,
      "modalities": [
        "text"
      ],
      "source_ids": [
        "openevals"
      ],
      "composite": {
        "score_0_100": 83.9,
        "confidence": "low",
        "components": [
          {
            "source_id": "openevals",
            "metric": "openevals_aggregate",
            "normalized_0_100": 83.9,
            "weight": 0.45
          }
        ],
        "derived_from": [
          "openevals:openevals_aggregate"
        ],
        "limitations": [
          "This is an external model-intelligence prior, not a RunLocalAI local speed or fit measurement.",
          "Provider papers, human-preference ratings, and LiveBench judgments are not interchangeable with quantized local execution.",
          "Use this score only beside Will-It-Run fit, quantization, context, runtime, and measured tok/s evidence."
        ]
      },
      "scores": [
        {
          "source_id": "openevals",
          "metric": "olm_ocr",
          "value": 83.9,
          "normalized_0_100": 83.9,
          "scale": "0-100",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/OpenEvals/leaderboard-data",
          "source_dataset": "OpenEvals/leaderboard-data",
          "source_config": "default",
          "source_split": "train",
          "source_row_count": 105,
          "captured_at": "2026-05-28T11:57:28.766Z",
          "evidence_note": "External model quality prior from OpenEvals; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "openevals",
          "metric": "openevals_aggregate",
          "value": 83.9,
          "normalized_0_100": 83.9,
          "scale": "0-100",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/OpenEvals/leaderboard-data",
          "source_dataset": "OpenEvals/leaderboard-data",
          "source_config": "default",
          "source_split": "train",
          "source_row_count": 105,
          "captured_at": "2026-05-28T11:57:28.766Z",
          "evidence_note": "External model quality prior from OpenEvals; not a RunLocalAI local execution measurement."
        }
      ]
    },
    {
      "model_key": "rednote-hilab-dots-mocr",
      "display_name": "rednote-hilab/dots.mocr",
      "provider": "rednote-hilab",
      "aliases": [
        "rednote-hilab/dots.mocr"
      ],
      "openness": "open",
      "license": "Unknown",
      "parameter_billions": 3,
      "context_window": null,
      "modalities": [
        "text"
      ],
      "source_ids": [
        "openevals"
      ],
      "composite": {
        "score_0_100": 83.9,
        "confidence": "low",
        "components": [
          {
            "source_id": "openevals",
            "metric": "openevals_aggregate",
            "normalized_0_100": 83.9,
            "weight": 0.45
          }
        ],
        "derived_from": [
          "openevals:openevals_aggregate"
        ],
        "limitations": [
          "This is an external model-intelligence prior, not a RunLocalAI local speed or fit measurement.",
          "Provider papers, human-preference ratings, and LiveBench judgments are not interchangeable with quantized local execution.",
          "Use this score only beside Will-It-Run fit, quantization, context, runtime, and measured tok/s evidence."
        ]
      },
      "scores": [
        {
          "source_id": "openevals",
          "metric": "olm_ocr",
          "value": 83.9,
          "normalized_0_100": 83.9,
          "scale": "0-100",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/OpenEvals/leaderboard-data",
          "source_dataset": "OpenEvals/leaderboard-data",
          "source_config": "default",
          "source_split": "train",
          "source_row_count": 105,
          "captured_at": "2026-05-28T11:57:28.766Z",
          "evidence_note": "External model quality prior from OpenEvals; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "openevals",
          "metric": "openevals_aggregate",
          "value": 83.9,
          "normalized_0_100": 83.9,
          "scale": "0-100",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/OpenEvals/leaderboard-data",
          "source_dataset": "OpenEvals/leaderboard-data",
          "source_config": "default",
          "source_split": "train",
          "source_row_count": 105,
          "captured_at": "2026-05-28T11:57:28.766Z",
          "evidence_note": "External model quality prior from OpenEvals; not a RunLocalAI local execution measurement."
        }
      ]
    },
    {
      "model_key": "deepseek-v3-2",
      "display_name": "deepseek-v3.2",
      "provider": "deepseek",
      "aliases": [
        "deepseek-v3.2"
      ],
      "openness": null,
      "license": "MIT",
      "parameter_billions": null,
      "context_window": null,
      "modalities": [
        "text"
      ],
      "source_ids": [
        "lmarena"
      ],
      "composite": {
        "score_0_100": 83.88,
        "confidence": "low",
        "components": [
          {
            "source_id": "lmarena",
            "metric": "lmarena_text_overall_rating",
            "normalized_0_100": 83.88,
            "weight": 0.35
          }
        ],
        "derived_from": [
          "lmarena:lmarena_text_overall_rating"
        ],
        "limitations": [
          "This is an external model-intelligence prior, not a RunLocalAI local speed or fit measurement.",
          "Provider papers, human-preference ratings, and LiveBench judgments are not interchangeable with quantized local execution.",
          "Use this score only beside Will-It-Run fit, quantization, context, runtime, and measured tok/s evidence."
        ]
      },
      "scores": [
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_chinese_rating",
          "value": 1454.533863104622,
          "normalized_0_100": 87.59,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 80,
          "lower": 1441.4617920470332,
          "upper": 1467.6059341622108,
          "category": "chinese",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_coding_rating",
          "value": 1446.566809289064,
          "normalized_0_100": 86.65,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 57,
          "lower": 1440.0034248131562,
          "upper": 1453.1301937649714,
          "category": "coding",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_overall_rating",
          "value": 1423.2868219538293,
          "normalized_0_100": 83.88,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 62,
          "lower": 1419.687131759044,
          "upper": 1426.8865121486147,
          "category": "overall",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        }
      ]
    },
    {
      "model_key": "o1-2024-12-17-medium",
      "display_name": "o1-2024-12-17-medium",
      "provider": null,
      "aliases": [
        "o1-2024-12-17-medium"
      ],
      "openness": null,
      "license": null,
      "parameter_billions": null,
      "context_window": null,
      "modalities": [
        "text"
      ],
      "source_ids": [
        "livebench"
      ],
      "composite": {
        "score_0_100": 83.82,
        "confidence": "low",
        "components": [
          {
            "source_id": "livebench",
            "metric": "livebench_overall_mean_judgment",
            "normalized_0_100": 83.82,
            "weight": 0.2
          }
        ],
        "derived_from": [
          "livebench:livebench_overall_mean_judgment"
        ],
        "limitations": [
          "This is an external model-intelligence prior, not a RunLocalAI local speed or fit measurement.",
          "Provider papers, human-preference ratings, and LiveBench judgments are not interchangeable with quantized local execution.",
          "Use this score only beside Will-It-Run fit, quantization, context, runtime, and measured tok/s evidence."
        ]
      },
      "scores": [
        {
          "source_id": "livebench",
          "metric": "livebench_category_coding_mean_judgment",
          "value": 1,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "coding",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench category aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_category_instruction_following_mean_judgment",
          "value": 1,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "instruction_following",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench category aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_category_language_mean_judgment",
          "value": 0.676471,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "language",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench category aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_overall_mean_judgment",
          "value": 0.838235,
          "normalized_0_100": 83.82,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench judgment aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_task_coding_completion_mean_judgment",
          "value": 1,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "coding",
          "task": "coding_completion",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench task aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_task_paraphrase_mean_judgment",
          "value": 1,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "instruction_following",
          "task": "paraphrase",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench task aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_task_plot_unscrambling_mean_judgment",
          "value": 0.352941,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "language",
          "task": "plot_unscrambling",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench task aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_task_typos_mean_judgment",
          "value": 1,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "language",
          "task": "typos",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench task aggregate; not a RunLocalAI local execution measurement."
        }
      ]
    },
    {
      "model_key": "deepseek-v3-2-exp",
      "display_name": "deepseek-v3.2-exp",
      "provider": "deepseek",
      "aliases": [
        "deepseek-v3.2-exp"
      ],
      "openness": null,
      "license": "MIT",
      "parameter_billions": null,
      "context_window": null,
      "modalities": [
        "text"
      ],
      "source_ids": [
        "lmarena"
      ],
      "composite": {
        "score_0_100": 83.8,
        "confidence": "low",
        "components": [
          {
            "source_id": "lmarena",
            "metric": "lmarena_text_overall_rating",
            "normalized_0_100": 83.8,
            "weight": 0.35
          }
        ],
        "derived_from": [
          "lmarena:lmarena_text_overall_rating"
        ],
        "limitations": [
          "This is an external model-intelligence prior, not a RunLocalAI local speed or fit measurement.",
          "Provider papers, human-preference ratings, and LiveBench judgments are not interchangeable with quantized local execution.",
          "Use this score only beside Will-It-Run fit, quantization, context, runtime, and measured tok/s evidence."
        ]
      },
      "scores": [
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_chinese_rating",
          "value": 1463.1715858979896,
          "normalized_0_100": 88.62,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 64,
          "lower": 1440.040458442827,
          "upper": 1486.302713353152,
          "category": "chinese",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_coding_rating",
          "value": 1433.8061308273332,
          "normalized_0_100": 85.13,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 81,
          "lower": 1422.1211379251433,
          "upper": 1445.491123729523,
          "category": "coding",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_overall_rating",
          "value": 1422.6010015909994,
          "normalized_0_100": 83.8,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 63,
          "lower": 1416.234430519316,
          "upper": 1428.967572662683,
          "category": "overall",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        }
      ]
    },
    {
      "model_key": "gpt-5-1",
      "display_name": "gpt-5.1",
      "provider": "openai",
      "aliases": [
        "gpt-5.1"
      ],
      "openness": null,
      "license": "Proprietary",
      "parameter_billions": null,
      "context_window": null,
      "modalities": [
        "text"
      ],
      "source_ids": [
        "lmarena"
      ],
      "composite": {
        "score_0_100": 83.77,
        "confidence": "low",
        "components": [
          {
            "source_id": "lmarena",
            "metric": "lmarena_text_overall_rating",
            "normalized_0_100": 83.77,
            "weight": 0.35
          }
        ],
        "derived_from": [
          "lmarena:lmarena_text_overall_rating"
        ],
        "limitations": [
          "This is an external model-intelligence prior, not a RunLocalAI local speed or fit measurement.",
          "Provider papers, human-preference ratings, and LiveBench judgments are not interchangeable with quantized local execution.",
          "Use this score only beside Will-It-Run fit, quantization, context, runtime, and measured tok/s evidence."
        ]
      },
      "scores": [
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_chinese_rating",
          "value": 1474.4960564269436,
          "normalized_0_100": 89.97,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 46,
          "lower": 1460.9715690727141,
          "upper": 1488.020543781173,
          "category": "chinese",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_coding_rating",
          "value": 1436.8234592729168,
          "normalized_0_100": 85.49,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 74,
          "lower": 1430.0733163655984,
          "upper": 1443.5736021802352,
          "category": "coding",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_overall_rating",
          "value": 1422.3958584295267,
          "normalized_0_100": 83.77,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 64,
          "lower": 1418.7514965756636,
          "upper": 1426.0402202833895,
          "category": "overall",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        }
      ]
    },
    {
      "model_key": "longcat-flash-chat",
      "display_name": "longcat-flash-chat",
      "provider": "meituan",
      "aliases": [
        "longcat-flash-chat"
      ],
      "openness": null,
      "license": "MIT",
      "parameter_billions": null,
      "context_window": null,
      "modalities": [
        "text"
      ],
      "source_ids": [
        "lmarena"
      ],
      "composite": {
        "score_0_100": 83.73,
        "confidence": "low",
        "components": [
          {
            "source_id": "lmarena",
            "metric": "lmarena_text_overall_rating",
            "normalized_0_100": 83.73,
            "weight": 0.35
          }
        ],
        "derived_from": [
          "lmarena:lmarena_text_overall_rating"
        ],
        "limitations": [
          "This is an external model-intelligence prior, not a RunLocalAI local speed or fit measurement.",
          "Provider papers, human-preference ratings, and LiveBench judgments are not interchangeable with quantized local execution.",
          "Use this score only beside Will-It-Run fit, quantization, context, runtime, and measured tok/s evidence."
        ]
      },
      "scores": [
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_chinese_rating",
          "value": 1430.415434593083,
          "normalized_0_100": 84.73,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 100,
          "lower": 1407.7223366966625,
          "upper": 1453.1085324895037,
          "category": "chinese",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_coding_rating",
          "value": 1467.909195820363,
          "normalized_0_100": 89.18,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 33,
          "lower": 1455.3772386002513,
          "upper": 1480.4411530404745,
          "category": "coding",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_overall_rating",
          "value": 1422.0089163667599,
          "normalized_0_100": 83.73,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 65,
          "lower": 1415.67211685603,
          "upper": 1428.3457158774897,
          "category": "overall",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        }
      ]
    },
    {
      "model_key": "mimo-v2-5",
      "display_name": "mimo-v2.5",
      "provider": "xiaomi",
      "aliases": [
        "mimo-v2.5"
      ],
      "openness": null,
      "license": "MIT",
      "parameter_billions": null,
      "context_window": null,
      "modalities": [
        "text"
      ],
      "source_ids": [
        "lmarena"
      ],
      "composite": {
        "score_0_100": 83.72,
        "confidence": "low",
        "components": [
          {
            "source_id": "lmarena",
            "metric": "lmarena_text_overall_rating",
            "normalized_0_100": 83.72,
            "weight": 0.35
          }
        ],
        "derived_from": [
          "lmarena:lmarena_text_overall_rating"
        ],
        "limitations": [
          "This is an external model-intelligence prior, not a RunLocalAI local speed or fit measurement.",
          "Provider papers, human-preference ratings, and LiveBench judgments are not interchangeable with quantized local execution.",
          "Use this score only beside Will-It-Run fit, quantization, context, runtime, and measured tok/s evidence."
        ]
      },
      "scores": [
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_chinese_rating",
          "value": 1488.3906388463745,
          "normalized_0_100": 91.62,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 33,
          "lower": 1460.3193928711753,
          "upper": 1516.4618848215737,
          "category": "chinese",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_coding_rating",
          "value": 1465.2899083389416,
          "normalized_0_100": 88.87,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 36,
          "lower": 1453.8083173315606,
          "upper": 1476.7714993463226,
          "category": "coding",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_overall_rating",
          "value": 1421.988423671957,
          "normalized_0_100": 83.72,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 66,
          "lower": 1415.6750138793766,
          "upper": 1428.3018334645376,
          "category": "overall",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        }
      ]
    },
    {
      "model_key": "qwen3-vl-235b-a22b-instruct",
      "display_name": "qwen3-vl-235b-a22b-instruct",
      "provider": "alibaba",
      "aliases": [
        "qwen3-vl-235b-a22b-instruct"
      ],
      "openness": null,
      "license": "Apache 2.0",
      "parameter_billions": null,
      "context_window": null,
      "modalities": [
        "text"
      ],
      "source_ids": [
        "lmarena"
      ],
      "composite": {
        "score_0_100": 83.61,
        "confidence": "low",
        "components": [
          {
            "source_id": "lmarena",
            "metric": "lmarena_text_overall_rating",
            "normalized_0_100": 83.61,
            "weight": 0.35
          }
        ],
        "derived_from": [
          "lmarena:lmarena_text_overall_rating"
        ],
        "limitations": [
          "This is an external model-intelligence prior, not a RunLocalAI local speed or fit measurement.",
          "Provider papers, human-preference ratings, and LiveBench judgments are not interchangeable with quantized local execution.",
          "Use this score only beside Will-It-Run fit, quantization, context, runtime, and measured tok/s evidence."
        ]
      },
      "scores": [
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_chinese_rating",
          "value": 1460.4064609651598,
          "normalized_0_100": 88.29,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 69,
          "lower": 1432.9447441161096,
          "upper": 1487.86817781421,
          "category": "chinese",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_coding_rating",
          "value": 1440.421754420752,
          "normalized_0_100": 85.92,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 71,
          "lower": 1427.4642066084546,
          "upper": 1453.3793022330497,
          "category": "coding",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_overall_rating",
          "value": 1421.073479695986,
          "normalized_0_100": 83.61,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 67,
          "lower": 1414.6151885389388,
          "upper": 1427.5317708530329,
          "category": "overall",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        }
      ]
    },
    {
      "model_key": "kimi-k2-5-instant",
      "display_name": "kimi-k2.5-instant",
      "provider": "moonshot",
      "aliases": [
        "kimi-k2.5-instant"
      ],
      "openness": null,
      "license": "Modified MIT",
      "parameter_billions": null,
      "context_window": null,
      "modalities": [
        "text"
      ],
      "source_ids": [
        "lmarena"
      ],
      "composite": {
        "score_0_100": 83.58,
        "confidence": "low",
        "components": [
          {
            "source_id": "lmarena",
            "metric": "lmarena_text_overall_rating",
            "normalized_0_100": 83.58,
            "weight": 0.35
          }
        ],
        "derived_from": [
          "lmarena:lmarena_text_overall_rating"
        ],
        "limitations": [
          "This is an external model-intelligence prior, not a RunLocalAI local speed or fit measurement.",
          "Provider papers, human-preference ratings, and LiveBench judgments are not interchangeable with quantized local execution.",
          "Use this score only beside Will-It-Run fit, quantization, context, runtime, and measured tok/s evidence."
        ]
      },
      "scores": [
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_chinese_rating",
          "value": 1460.7575268360026,
          "normalized_0_100": 88.33,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 68,
          "lower": 1423.915148321106,
          "upper": 1497.5999053508992,
          "category": "chinese",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_coding_rating",
          "value": 1484.9177032769737,
          "normalized_0_100": 91.21,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 19,
          "lower": 1471.1830215999223,
          "upper": 1498.652384954025,
          "category": "coding",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_overall_rating",
          "value": 1420.806127109504,
          "normalized_0_100": 83.58,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 68,
          "lower": 1414.2478784670936,
          "upper": 1427.3643757519144,
          "category": "overall",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        }
      ]
    },
    {
      "model_key": "amazon-nova-experimental-chat-12-10",
      "display_name": "amazon-nova-experimental-chat-12-10",
      "provider": "amazon",
      "aliases": [
        "amazon-nova-experimental-chat-12-10"
      ],
      "openness": null,
      "license": "Proprietary",
      "parameter_billions": null,
      "context_window": null,
      "modalities": [
        "text"
      ],
      "source_ids": [
        "lmarena"
      ],
      "composite": {
        "score_0_100": 83.49,
        "confidence": "low",
        "components": [
          {
            "source_id": "lmarena",
            "metric": "lmarena_text_overall_rating",
            "normalized_0_100": 83.49,
            "weight": 0.35
          }
        ],
        "derived_from": [
          "lmarena:lmarena_text_overall_rating"
        ],
        "limitations": [
          "This is an external model-intelligence prior, not a RunLocalAI local speed or fit measurement.",
          "Provider papers, human-preference ratings, and LiveBench judgments are not interchangeable with quantized local execution.",
          "Use this score only beside Will-It-Run fit, quantization, context, runtime, and measured tok/s evidence."
        ]
      },
      "scores": [
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_chinese_rating",
          "value": 1448.6027181538564,
          "normalized_0_100": 86.89,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 90,
          "lower": 1413.3563553836022,
          "upper": 1483.8490809241105,
          "category": "chinese",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_coding_rating",
          "value": 1429.2695733460432,
          "normalized_0_100": 84.59,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 88,
          "lower": 1407.9754811452308,
          "upper": 1450.5636655468556,
          "category": "coding",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_overall_rating",
          "value": 1420.0253245307638,
          "normalized_0_100": 83.49,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 69,
          "lower": 1410.4937221148055,
          "upper": 1429.5569269467221,
          "category": "overall",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        }
      ]
    },
    {
      "model_key": "gemini-3-1-flash-lite-preview",
      "display_name": "gemini-3.1-flash-lite-preview",
      "provider": "google",
      "aliases": [
        "gemini-3.1-flash-lite-preview"
      ],
      "openness": null,
      "license": "Proprietary",
      "parameter_billions": null,
      "context_window": null,
      "modalities": [
        "text"
      ],
      "source_ids": [
        "lmarena"
      ],
      "composite": {
        "score_0_100": 83.45,
        "confidence": "low",
        "components": [
          {
            "source_id": "lmarena",
            "metric": "lmarena_text_overall_rating",
            "normalized_0_100": 83.45,
            "weight": 0.35
          }
        ],
        "derived_from": [
          "lmarena:lmarena_text_overall_rating"
        ],
        "limitations": [
          "This is an external model-intelligence prior, not a RunLocalAI local speed or fit measurement.",
          "Provider papers, human-preference ratings, and LiveBench judgments are not interchangeable with quantized local execution.",
          "Use this score only beside Will-It-Run fit, quantization, context, runtime, and measured tok/s evidence."
        ]
      },
      "scores": [
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_chinese_rating",
          "value": 1454.1815214515323,
          "normalized_0_100": 87.55,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 81,
          "lower": 1438.4040276784783,
          "upper": 1469.9590152245864,
          "category": "chinese",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_coding_rating",
          "value": 1402.5029678494896,
          "normalized_0_100": 81.41,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 118,
          "lower": 1394.9266688738542,
          "upper": 1410.0792668251247,
          "category": "coding",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_overall_rating",
          "value": 1419.6832006823297,
          "normalized_0_100": 83.45,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 70,
          "lower": 1415.247570721984,
          "upper": 1424.1188306426754,
          "category": "overall",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        }
      ]
    },
    {
      "model_key": "deepseek-v3-1-terminus-thinking",
      "display_name": "deepseek-v3.1-terminus-thinking",
      "provider": "deepseek",
      "aliases": [
        "deepseek-v3.1-terminus-thinking"
      ],
      "openness": null,
      "license": "MIT",
      "parameter_billions": null,
      "context_window": null,
      "modalities": [
        "text"
      ],
      "source_ids": [
        "lmarena"
      ],
      "composite": {
        "score_0_100": 83.43,
        "confidence": "low",
        "components": [
          {
            "source_id": "lmarena",
            "metric": "lmarena_text_overall_rating",
            "normalized_0_100": 83.43,
            "weight": 0.35
          }
        ],
        "derived_from": [
          "lmarena:lmarena_text_overall_rating"
        ],
        "limitations": [
          "This is an external model-intelligence prior, not a RunLocalAI local speed or fit measurement.",
          "Provider papers, human-preference ratings, and LiveBench judgments are not interchangeable with quantized local execution.",
          "Use this score only beside Will-It-Run fit, quantization, context, runtime, and measured tok/s evidence."
        ]
      },
      "scores": [
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_coding_rating",
          "value": 1429.0716143848008,
          "normalized_0_100": 84.57,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 89,
          "lower": 1405.6711279413967,
          "upper": 1452.4721008282052,
          "category": "coding",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_overall_rating",
          "value": 1419.4869477958391,
          "normalized_0_100": 83.43,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 71,
          "lower": 1409.5261803659591,
          "upper": 1429.4477152257189,
          "category": "overall",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        }
      ]
    },
    {
      "model_key": "deepseek-v3-1",
      "display_name": "deepseek-v3.1",
      "provider": "deepseek",
      "aliases": [
        "deepseek-v3.1"
      ],
      "openness": null,
      "license": "MIT",
      "parameter_billions": null,
      "context_window": null,
      "modalities": [
        "text"
      ],
      "source_ids": [
        "lmarena"
      ],
      "composite": {
        "score_0_100": 83.41,
        "confidence": "low",
        "components": [
          {
            "source_id": "lmarena",
            "metric": "lmarena_text_overall_rating",
            "normalized_0_100": 83.41,
            "weight": 0.35
          }
        ],
        "derived_from": [
          "lmarena:lmarena_text_overall_rating"
        ],
        "limitations": [
          "This is an external model-intelligence prior, not a RunLocalAI local speed or fit measurement.",
          "Provider papers, human-preference ratings, and LiveBench judgments are not interchangeable with quantized local execution.",
          "Use this score only beside Will-It-Run fit, quantization, context, runtime, and measured tok/s evidence."
        ]
      },
      "scores": [
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_chinese_rating",
          "value": 1458.1279002669735,
          "normalized_0_100": 88.02,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 71,
          "lower": 1440.218666468689,
          "upper": 1476.0371340652582,
          "category": "chinese",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_coding_rating",
          "value": 1414.1246683521993,
          "normalized_0_100": 82.79,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 103,
          "lower": 1402.635016784002,
          "upper": 1425.6143199203968,
          "category": "coding",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_overall_rating",
          "value": 1419.3839127215274,
          "normalized_0_100": 83.41,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 72,
          "lower": 1413.3953598064609,
          "upper": 1425.3724656365941,
          "category": "overall",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        }
      ]
    },
    {
      "model_key": "deepseek-v3-2-thinking",
      "display_name": "deepseek-v3.2-thinking",
      "provider": "deepseek",
      "aliases": [
        "deepseek-v3.2-thinking"
      ],
      "openness": null,
      "license": "MIT",
      "parameter_billions": null,
      "context_window": null,
      "modalities": [
        "text"
      ],
      "source_ids": [
        "lmarena"
      ],
      "composite": {
        "score_0_100": 83.41,
        "confidence": "low",
        "components": [
          {
            "source_id": "lmarena",
            "metric": "lmarena_text_overall_rating",
            "normalized_0_100": 83.41,
            "weight": 0.35
          }
        ],
        "derived_from": [
          "lmarena:lmarena_text_overall_rating"
        ],
        "limitations": [
          "This is an external model-intelligence prior, not a RunLocalAI local speed or fit measurement.",
          "Provider papers, human-preference ratings, and LiveBench judgments are not interchangeable with quantized local execution.",
          "Use this score only beside Will-It-Run fit, quantization, context, runtime, and measured tok/s evidence."
        ]
      },
      "scores": [
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_chinese_rating",
          "value": 1451.003967375469,
          "normalized_0_100": 87.17,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 85,
          "lower": 1437.0509543568232,
          "upper": 1464.956980394115,
          "category": "chinese",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_coding_rating",
          "value": 1453.428488542361,
          "normalized_0_100": 87.46,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 48,
          "lower": 1446.4427315077432,
          "upper": 1460.414245576979,
          "category": "coding",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_overall_rating",
          "value": 1419.3137062460628,
          "normalized_0_100": 83.41,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 73,
          "lower": 1415.623002529801,
          "upper": 1423.0044099623244,
          "category": "overall",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        }
      ]
    },
    {
      "model_key": "qwen3-235b-a22b-instruct-2507",
      "display_name": "qwen3-235b-a22b-instruct-2507",
      "provider": "alibaba",
      "aliases": [
        "qwen3-235b-a22b-instruct-2507"
      ],
      "openness": null,
      "license": "Apache 2.0",
      "parameter_billions": null,
      "context_window": null,
      "modalities": [
        "text"
      ],
      "source_ids": [
        "lmarena"
      ],
      "composite": {
        "score_0_100": 83.37,
        "confidence": "low",
        "components": [
          {
            "source_id": "lmarena",
            "metric": "lmarena_text_overall_rating",
            "normalized_0_100": 83.37,
            "weight": 0.35
          }
        ],
        "derived_from": [
          "lmarena:lmarena_text_overall_rating"
        ],
        "limitations": [
          "This is an external model-intelligence prior, not a RunLocalAI local speed or fit measurement.",
          "Provider papers, human-preference ratings, and LiveBench judgments are not interchangeable with quantized local execution.",
          "Use this score only beside Will-It-Run fit, quantization, context, runtime, and measured tok/s evidence."
        ]
      },
      "scores": [
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_chinese_rating",
          "value": 1466.51844781176,
          "normalized_0_100": 89.02,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 59,
          "lower": 1457.286137500203,
          "upper": 1475.7507581233174,
          "category": "chinese",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_coding_rating",
          "value": 1444.2568946588262,
          "normalized_0_100": 86.37,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 61,
          "lower": 1439.5225582661164,
          "upper": 1448.9912310515358,
          "category": "coding",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_overall_rating",
          "value": 1418.9968075677791,
          "normalized_0_100": 83.37,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 74,
          "lower": 1416.3722938074673,
          "upper": 1421.6213213280907,
          "category": "overall",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        }
      ]
    },
    {
      "model_key": "gpt-5-5-instant",
      "display_name": "gpt-5.5-instant",
      "provider": "openai",
      "aliases": [
        "gpt-5.5-instant"
      ],
      "openness": null,
      "license": "Proprietary",
      "parameter_billions": null,
      "context_window": null,
      "modalities": [
        "text"
      ],
      "source_ids": [
        "lmarena"
      ],
      "composite": {
        "score_0_100": 83.33,
        "confidence": "low",
        "components": [
          {
            "source_id": "lmarena",
            "metric": "lmarena_text_overall_rating",
            "normalized_0_100": 83.33,
            "weight": 0.35
          }
        ],
        "derived_from": [
          "lmarena:lmarena_text_overall_rating"
        ],
        "limitations": [
          "This is an external model-intelligence prior, not a RunLocalAI local speed or fit measurement.",
          "Provider papers, human-preference ratings, and LiveBench judgments are not interchangeable with quantized local execution.",
          "Use this score only beside Will-It-Run fit, quantization, context, runtime, and measured tok/s evidence."
        ]
      },
      "scores": [
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_chinese_rating",
          "value": 1466.1812136896065,
          "normalized_0_100": 88.98,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 63,
          "lower": 1440.0086751879746,
          "upper": 1492.353752191238,
          "category": "chinese",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_coding_rating",
          "value": 1435.631359572019,
          "normalized_0_100": 85.35,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 76,
          "lower": 1424.4012600012993,
          "upper": 1446.8614591427386,
          "category": "coding",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_overall_rating",
          "value": 1418.7063892175897,
          "normalized_0_100": 83.33,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 75,
          "lower": 1412.4914251728583,
          "upper": 1424.9213532623212,
          "category": "overall",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        }
      ]
    },
    {
      "model_key": "qwen3-next-80b-a3b-instruct",
      "display_name": "qwen3-next-80b-a3b-instruct",
      "provider": "alibaba",
      "aliases": [
        "qwen3-next-80b-a3b-instruct"
      ],
      "openness": null,
      "license": "Apache 2.0",
      "parameter_billions": null,
      "context_window": null,
      "modalities": [
        "text"
      ],
      "source_ids": [
        "lmarena"
      ],
      "composite": {
        "score_0_100": 83.32,
        "confidence": "low",
        "components": [
          {
            "source_id": "lmarena",
            "metric": "lmarena_text_overall_rating",
            "normalized_0_100": 83.32,
            "weight": 0.35
          }
        ],
        "derived_from": [
          "lmarena:lmarena_text_overall_rating"
        ],
        "limitations": [
          "This is an external model-intelligence prior, not a RunLocalAI local speed or fit measurement.",
          "Provider papers, human-preference ratings, and LiveBench judgments are not interchangeable with quantized local execution.",
          "Use this score only beside Will-It-Run fit, quantization, context, runtime, and measured tok/s evidence."
        ]
      },
      "scores": [
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_chinese_rating",
          "value": 1473.129942757165,
          "normalized_0_100": 89.8,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 49,
          "lower": 1455.9687440928149,
          "upper": 1490.2911414215155,
          "category": "chinese",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_coding_rating",
          "value": 1440.9683858484964,
          "normalized_0_100": 85.98,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 69,
          "lower": 1432.3315482894084,
          "upper": 1449.6052234075844,
          "category": "coding",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_overall_rating",
          "value": 1418.6139354309703,
          "normalized_0_100": 83.32,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 76,
          "lower": 1413.8766062830048,
          "upper": 1423.3512645789356,
          "category": "overall",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        }
      ]
    },
    {
      "model_key": "claude-opus-4-1-20250805-thinking-16k",
      "display_name": "claude-opus-4-1-20250805-thinking-16k",
      "provider": "anthropic",
      "aliases": [
        "claude-opus-4-1-20250805-thinking-16k"
      ],
      "openness": null,
      "license": "Proprietary",
      "parameter_billions": null,
      "context_window": null,
      "modalities": [
        "text"
      ],
      "source_ids": [
        "lmarena"
      ],
      "composite": {
        "score_0_100": 83.3,
        "confidence": "low",
        "components": [
          {
            "source_id": "lmarena",
            "metric": "lmarena_text_overall_rating",
            "normalized_0_100": 83.3,
            "weight": 0.35
          }
        ],
        "derived_from": [
          "lmarena:lmarena_text_overall_rating"
        ],
        "limitations": [
          "This is an external model-intelligence prior, not a RunLocalAI local speed or fit measurement.",
          "Provider papers, human-preference ratings, and LiveBench judgments are not interchangeable with quantized local execution.",
          "Use this score only beside Will-It-Run fit, quantization, context, runtime, and measured tok/s evidence."
        ]
      },
      "scores": [
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_chinese_rating",
          "value": 1419.6714354207588,
          "normalized_0_100": 83.45,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 115,
          "lower": 1407.6049692225033,
          "upper": 1431.7379016190143,
          "category": "chinese",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_coding_rating",
          "value": 1480.6314423680687,
          "normalized_0_100": 90.7,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 26,
          "lower": 1474.1377811792918,
          "upper": 1487.1251035568457,
          "category": "coding",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_overall_rating",
          "value": 1418.4594719541371,
          "normalized_0_100": 83.3,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 77,
          "lower": 1414.9954702840025,
          "upper": 1421.923473624272,
          "category": "overall",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        }
      ]
    },
    {
      "model_key": "gpt-5-2-high",
      "display_name": "gpt-5.2-high",
      "provider": "openai",
      "aliases": [
        "gpt-5.2-high"
      ],
      "openness": null,
      "license": "Proprietary",
      "parameter_billions": null,
      "context_window": null,
      "modalities": [
        "text"
      ],
      "source_ids": [
        "lmarena"
      ],
      "composite": {
        "score_0_100": 83.3,
        "confidence": "low",
        "components": [
          {
            "source_id": "lmarena",
            "metric": "lmarena_text_overall_rating",
            "normalized_0_100": 83.3,
            "weight": 0.35
          }
        ],
        "derived_from": [
          "lmarena:lmarena_text_overall_rating"
        ],
        "limitations": [
          "This is an external model-intelligence prior, not a RunLocalAI local speed or fit measurement.",
          "Provider papers, human-preference ratings, and LiveBench judgments are not interchangeable with quantized local execution.",
          "Use this score only beside Will-It-Run fit, quantization, context, runtime, and measured tok/s evidence."
        ]
      },
      "scores": [
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_chinese_rating",
          "value": 1457.0931049807793,
          "normalized_0_100": 87.9,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 74,
          "lower": 1443.6965236269577,
          "upper": 1470.489686334601,
          "category": "chinese",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_coding_rating",
          "value": 1445.8330593629103,
          "normalized_0_100": 86.56,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 58,
          "lower": 1439.2867224657841,
          "upper": 1452.3793962600364,
          "category": "coding",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_overall_rating",
          "value": 1418.4501935575965,
          "normalized_0_100": 83.3,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 78,
          "lower": 1414.7206698784414,
          "upper": 1422.1797172367517,
          "category": "overall",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        }
      ]
    },
    {
      "model_key": "qwen3-5-122b-a10b",
      "display_name": "qwen3.5-122b-a10b",
      "provider": "alibaba",
      "aliases": [
        "qwen3.5-122b-a10b"
      ],
      "openness": null,
      "license": "Apache 2.0",
      "parameter_billions": null,
      "context_window": null,
      "modalities": [
        "text"
      ],
      "source_ids": [
        "lmarena"
      ],
      "composite": {
        "score_0_100": 83.27,
        "confidence": "low",
        "components": [
          {
            "source_id": "lmarena",
            "metric": "lmarena_text_overall_rating",
            "normalized_0_100": 83.27,
            "weight": 0.35
          }
        ],
        "derived_from": [
          "lmarena:lmarena_text_overall_rating"
        ],
        "limitations": [
          "This is an external model-intelligence prior, not a RunLocalAI local speed or fit measurement.",
          "Provider papers, human-preference ratings, and LiveBench judgments are not interchangeable with quantized local execution.",
          "Use this score only beside Will-It-Run fit, quantization, context, runtime, and measured tok/s evidence."
        ]
      },
      "scores": [
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_chinese_rating",
          "value": 1466.3100719542988,
          "normalized_0_100": 88.99,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 61,
          "lower": 1449.2585193747786,
          "upper": 1483.361624533819,
          "category": "chinese",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_coding_rating",
          "value": 1431.8711256185945,
          "normalized_0_100": 84.9,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 85,
          "lower": 1423.9440706561702,
          "upper": 1439.7981805810189,
          "category": "coding",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_overall_rating",
          "value": 1418.1783960443731,
          "normalized_0_100": 83.27,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 79,
          "lower": 1413.6238027249176,
          "upper": 1422.732989363829,
          "category": "overall",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        }
      ]
    },
    {
      "model_key": "deepseek-v3-1-terminus",
      "display_name": "deepseek-v3.1-terminus",
      "provider": "deepseek",
      "aliases": [
        "deepseek-v3.1-terminus"
      ],
      "openness": null,
      "license": "MIT",
      "parameter_billions": null,
      "context_window": null,
      "modalities": [
        "text"
      ],
      "source_ids": [
        "lmarena"
      ],
      "composite": {
        "score_0_100": 83.22,
        "confidence": "low",
        "components": [
          {
            "source_id": "lmarena",
            "metric": "lmarena_text_overall_rating",
            "normalized_0_100": 83.22,
            "weight": 0.35
          }
        ],
        "derived_from": [
          "lmarena:lmarena_text_overall_rating"
        ],
        "limitations": [
          "This is an external model-intelligence prior, not a RunLocalAI local speed or fit measurement.",
          "Provider papers, human-preference ratings, and LiveBench judgments are not interchangeable with quantized local execution.",
          "Use this score only beside Will-It-Run fit, quantization, context, runtime, and measured tok/s evidence."
        ]
      },
      "scores": [
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_coding_rating",
          "value": 1406.636315235652,
          "normalized_0_100": 81.9,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 115,
          "lower": 1386.0662421090663,
          "upper": 1427.206388362238,
          "category": "coding",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_overall_rating",
          "value": 1417.7605621651157,
          "normalized_0_100": 83.22,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 80,
          "lower": 1408.1518880505214,
          "upper": 1427.3692362797103,
          "category": "overall",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        }
      ]
    },
    {
      "model_key": "claude-opus-4-1-20250805",
      "display_name": "claude-opus-4-1-20250805",
      "provider": "anthropic",
      "aliases": [
        "claude-opus-4-1-20250805"
      ],
      "openness": null,
      "license": "Proprietary",
      "parameter_billions": null,
      "context_window": null,
      "modalities": [
        "text"
      ],
      "source_ids": [
        "lmarena"
      ],
      "composite": {
        "score_0_100": 83.2,
        "confidence": "low",
        "components": [
          {
            "source_id": "lmarena",
            "metric": "lmarena_text_overall_rating",
            "normalized_0_100": 83.2,
            "weight": 0.35
          }
        ],
        "derived_from": [
          "lmarena:lmarena_text_overall_rating"
        ],
        "limitations": [
          "This is an external model-intelligence prior, not a RunLocalAI local speed or fit measurement.",
          "Provider papers, human-preference ratings, and LiveBench judgments are not interchangeable with quantized local execution.",
          "Use this score only beside Will-It-Run fit, quantization, context, runtime, and measured tok/s evidence."
        ]
      },
      "scores": [
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_chinese_rating",
          "value": 1424.8581017849447,
          "normalized_0_100": 84.06,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 108,
          "lower": 1414.7697443302977,
          "upper": 1434.9464592395916,
          "category": "chinese",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_coding_rating",
          "value": 1473.4634617248357,
          "normalized_0_100": 89.84,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 30,
          "lower": 1468.026873282924,
          "upper": 1478.9000501667474,
          "category": "coding",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_overall_rating",
          "value": 1417.5492730039682,
          "normalized_0_100": 83.2,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 82,
          "lower": 1414.5436682053517,
          "upper": 1420.5548778025845,
          "category": "overall",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        }
      ]
    },
    {
      "model_key": "gemini-2-5-flash",
      "display_name": "gemini-2.5-flash",
      "provider": "google",
      "aliases": [
        "gemini-2.5-flash"
      ],
      "openness": null,
      "license": "Proprietary",
      "parameter_billions": null,
      "context_window": null,
      "modalities": [
        "text"
      ],
      "source_ids": [
        "lmarena"
      ],
      "composite": {
        "score_0_100": 83.2,
        "confidence": "low",
        "components": [
          {
            "source_id": "lmarena",
            "metric": "lmarena_text_overall_rating",
            "normalized_0_100": 83.2,
            "weight": 0.35
          }
        ],
        "derived_from": [
          "lmarena:lmarena_text_overall_rating"
        ],
        "limitations": [
          "This is an external model-intelligence prior, not a RunLocalAI local speed or fit measurement.",
          "Provider papers, human-preference ratings, and LiveBench judgments are not interchangeable with quantized local execution.",
          "Use this score only beside Will-It-Run fit, quantization, context, runtime, and measured tok/s evidence."
        ]
      },
      "scores": [
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_chinese_rating",
          "value": 1452.2980950237827,
          "normalized_0_100": 87.33,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 83,
          "lower": 1444.051697784537,
          "upper": 1460.5444922630286,
          "category": "chinese",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_coding_rating",
          "value": 1422.6809586554482,
          "normalized_0_100": 83.81,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 95,
          "lower": 1418.2666231470425,
          "upper": 1427.0952941638536,
          "category": "coding",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_overall_rating",
          "value": 1417.597486219442,
          "normalized_0_100": 83.2,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 81,
          "lower": 1415.1107530943152,
          "upper": 1420.0842193445687,
          "category": "overall",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        }
      ]
    },
    {
      "model_key": "lightonai-lightonocr-2-1b",
      "display_name": "lightonai/LightOnOCR-2-1B",
      "provider": "lightonai",
      "aliases": [
        "lightonai/LightOnOCR-2-1B"
      ],
      "openness": "open",
      "license": "Unknown",
      "parameter_billions": 1,
      "context_window": null,
      "modalities": [
        "text"
      ],
      "source_ids": [
        "openevals"
      ],
      "composite": {
        "score_0_100": 83.2,
        "confidence": "low",
        "components": [
          {
            "source_id": "openevals",
            "metric": "openevals_aggregate",
            "normalized_0_100": 83.2,
            "weight": 0.45
          }
        ],
        "derived_from": [
          "openevals:openevals_aggregate"
        ],
        "limitations": [
          "This is an external model-intelligence prior, not a RunLocalAI local speed or fit measurement.",
          "Provider papers, human-preference ratings, and LiveBench judgments are not interchangeable with quantized local execution.",
          "Use this score only beside Will-It-Run fit, quantization, context, runtime, and measured tok/s evidence."
        ]
      },
      "scores": [
        {
          "source_id": "openevals",
          "metric": "olm_ocr",
          "value": 83.2,
          "normalized_0_100": 83.2,
          "scale": "0-100",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/OpenEvals/leaderboard-data",
          "source_dataset": "OpenEvals/leaderboard-data",
          "source_config": "default",
          "source_split": "train",
          "source_row_count": 105,
          "captured_at": "2026-05-28T11:57:28.766Z",
          "evidence_note": "External model quality prior from OpenEvals; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "openevals",
          "metric": "openevals_aggregate",
          "value": 83.2,
          "normalized_0_100": 83.2,
          "scale": "0-100",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/OpenEvals/leaderboard-data",
          "source_dataset": "OpenEvals/leaderboard-data",
          "source_config": "default",
          "source_split": "train",
          "source_row_count": 105,
          "captured_at": "2026-05-28T11:57:28.766Z",
          "evidence_note": "External model quality prior from OpenEvals; not a RunLocalAI local execution measurement."
        }
      ]
    },
    {
      "model_key": "qwen-qwen3-30b-a3b-thinking-2507",
      "display_name": "Qwen/Qwen3-30B-A3B-Thinking-2507",
      "provider": "Qwen",
      "aliases": [
        "Qwen/Qwen3-30B-A3B-Thinking-2507"
      ],
      "openness": "open",
      "license": "Unknown",
      "parameter_billions": 30.5,
      "context_window": null,
      "modalities": [
        "text"
      ],
      "source_ids": [
        "openevals"
      ],
      "composite": {
        "score_0_100": 83.15,
        "confidence": "low",
        "components": [
          {
            "source_id": "openevals",
            "metric": "openevals_aggregate",
            "normalized_0_100": 83.15,
            "weight": 0.45
          }
        ],
        "derived_from": [
          "openevals:openevals_aggregate"
        ],
        "limitations": [
          "This is an external model-intelligence prior, not a RunLocalAI local speed or fit measurement.",
          "Provider papers, human-preference ratings, and LiveBench judgments are not interchangeable with quantized local execution.",
          "Use this score only beside Will-It-Run fit, quantization, context, runtime, and measured tok/s evidence."
        ]
      },
      "scores": [
        {
          "source_id": "openevals",
          "metric": "aime2026",
          "value": 87.5,
          "normalized_0_100": 87.5,
          "scale": "0-100",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/OpenEvals/leaderboard-data",
          "source_dataset": "OpenEvals/leaderboard-data",
          "source_config": "default",
          "source_split": "train",
          "source_row_count": 105,
          "captured_at": "2026-05-28T11:57:28.766Z",
          "evidence_note": "External model quality prior from OpenEvals; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "openevals",
          "metric": "hmmt2026",
          "value": 78.79,
          "normalized_0_100": 78.79,
          "scale": "0-100",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/OpenEvals/leaderboard-data",
          "source_dataset": "OpenEvals/leaderboard-data",
          "source_config": "default",
          "source_split": "train",
          "source_row_count": 105,
          "captured_at": "2026-05-28T11:57:28.766Z",
          "evidence_note": "External model quality prior from OpenEvals; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "openevals",
          "metric": "openevals_aggregate",
          "value": 83.15,
          "normalized_0_100": 83.15,
          "scale": "0-100",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/OpenEvals/leaderboard-data",
          "source_dataset": "OpenEvals/leaderboard-data",
          "source_config": "default",
          "source_split": "train",
          "source_row_count": 105,
          "captured_at": "2026-05-28T11:57:28.766Z",
          "evidence_note": "External model quality prior from OpenEvals; not a RunLocalAI local execution measurement."
        }
      ]
    },
    {
      "model_key": "gpt-5-4-mini-high",
      "display_name": "gpt-5.4-mini-high",
      "provider": "openai",
      "aliases": [
        "gpt-5.4-mini-high"
      ],
      "openness": null,
      "license": "Proprietary",
      "parameter_billions": null,
      "context_window": null,
      "modalities": [
        "text"
      ],
      "source_ids": [
        "lmarena"
      ],
      "composite": {
        "score_0_100": 83.14,
        "confidence": "low",
        "components": [
          {
            "source_id": "lmarena",
            "metric": "lmarena_text_overall_rating",
            "normalized_0_100": 83.14,
            "weight": 0.35
          }
        ],
        "derived_from": [
          "lmarena:lmarena_text_overall_rating"
        ],
        "limitations": [
          "This is an external model-intelligence prior, not a RunLocalAI local speed or fit measurement.",
          "Provider papers, human-preference ratings, and LiveBench judgments are not interchangeable with quantized local execution.",
          "Use this score only beside Will-It-Run fit, quantization, context, runtime, and measured tok/s evidence."
        ]
      },
      "scores": [
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_chinese_rating",
          "value": 1466.8759734816545,
          "normalized_0_100": 89.06,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 58,
          "lower": 1447.7656003456595,
          "upper": 1485.9863466176491,
          "category": "chinese",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_coding_rating",
          "value": 1443.6016424789084,
          "normalized_0_100": 86.29,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 64,
          "lower": 1434.6727758502386,
          "upper": 1452.5305091075782,
          "category": "coding",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_overall_rating",
          "value": 1417.114280346801,
          "normalized_0_100": 83.14,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 84,
          "lower": 1412.1413656804145,
          "upper": 1422.0871950131875,
          "category": "overall",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        }
      ]
    },
    {
      "model_key": "datalab-to-chandra",
      "display_name": "datalab-to/chandra",
      "provider": "datalab-to",
      "aliases": [
        "datalab-to/chandra"
      ],
      "openness": "open",
      "license": "Unknown",
      "parameter_billions": 8.8,
      "context_window": null,
      "modalities": [
        "text"
      ],
      "source_ids": [
        "openevals"
      ],
      "composite": {
        "score_0_100": 83.1,
        "confidence": "low",
        "components": [
          {
            "source_id": "openevals",
            "metric": "openevals_aggregate",
            "normalized_0_100": 83.1,
            "weight": 0.45
          }
        ],
        "derived_from": [
          "openevals:openevals_aggregate"
        ],
        "limitations": [
          "This is an external model-intelligence prior, not a RunLocalAI local speed or fit measurement.",
          "Provider papers, human-preference ratings, and LiveBench judgments are not interchangeable with quantized local execution.",
          "Use this score only beside Will-It-Run fit, quantization, context, runtime, and measured tok/s evidence."
        ]
      },
      "scores": [
        {
          "source_id": "openevals",
          "metric": "olm_ocr",
          "value": 83.1,
          "normalized_0_100": 83.1,
          "scale": "0-100",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/OpenEvals/leaderboard-data",
          "source_dataset": "OpenEvals/leaderboard-data",
          "source_config": "default",
          "source_split": "train",
          "source_row_count": 105,
          "captured_at": "2026-05-28T11:57:28.766Z",
          "evidence_note": "External model quality prior from OpenEvals; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "openevals",
          "metric": "openevals_aggregate",
          "value": 83.1,
          "normalized_0_100": 83.1,
          "scale": "0-100",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/OpenEvals/leaderboard-data",
          "source_dataset": "OpenEvals/leaderboard-data",
          "source_config": "default",
          "source_split": "train",
          "source_row_count": 105,
          "captured_at": "2026-05-28T11:57:28.766Z",
          "evidence_note": "External model quality prior from OpenEvals; not a RunLocalAI local execution measurement."
        }
      ]
    },
    {
      "model_key": "deepseek-v3-1-thinking",
      "display_name": "deepseek-v3.1-thinking",
      "provider": "deepseek",
      "aliases": [
        "deepseek-v3.1-thinking"
      ],
      "openness": null,
      "license": "MIT",
      "parameter_billions": null,
      "context_window": null,
      "modalities": [
        "text"
      ],
      "source_ids": [
        "lmarena"
      ],
      "composite": {
        "score_0_100": 83.1,
        "confidence": "low",
        "components": [
          {
            "source_id": "lmarena",
            "metric": "lmarena_text_overall_rating",
            "normalized_0_100": 83.1,
            "weight": 0.35
          }
        ],
        "derived_from": [
          "lmarena:lmarena_text_overall_rating"
        ],
        "limitations": [
          "This is an external model-intelligence prior, not a RunLocalAI local speed or fit measurement.",
          "Provider papers, human-preference ratings, and LiveBench judgments are not interchangeable with quantized local execution.",
          "Use this score only beside Will-It-Run fit, quantization, context, runtime, and measured tok/s evidence."
        ]
      },
      "scores": [
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_chinese_rating",
          "value": 1470.3063669892535,
          "normalized_0_100": 89.47,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 52,
          "lower": 1449.6984584169747,
          "upper": 1490.9142755615321,
          "category": "chinese",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_coding_rating",
          "value": 1418.607121928065,
          "normalized_0_100": 83.32,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 98,
          "lower": 1405.2218068923303,
          "upper": 1431.9924369637995,
          "category": "coding",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_overall_rating",
          "value": 1416.7252353451922,
          "normalized_0_100": 83.1,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 85,
          "lower": 1410.1493893992433,
          "upper": 1423.3010812911411,
          "category": "overall",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        }
      ]
    },
    {
      "model_key": "amazon-nova-experimental-chat-11-10",
      "display_name": "amazon-nova-experimental-chat-11-10",
      "provider": "amazon",
      "aliases": [
        "amazon-nova-experimental-chat-11-10"
      ],
      "openness": null,
      "license": "Proprietary",
      "parameter_billions": null,
      "context_window": null,
      "modalities": [
        "text"
      ],
      "source_ids": [
        "lmarena"
      ],
      "composite": {
        "score_0_100": 83.09,
        "confidence": "low",
        "components": [
          {
            "source_id": "lmarena",
            "metric": "lmarena_text_overall_rating",
            "normalized_0_100": 83.09,
            "weight": 0.35
          }
        ],
        "derived_from": [
          "lmarena:lmarena_text_overall_rating"
        ],
        "limitations": [
          "This is an external model-intelligence prior, not a RunLocalAI local speed or fit measurement.",
          "Provider papers, human-preference ratings, and LiveBench judgments are not interchangeable with quantized local execution.",
          "Use this score only beside Will-It-Run fit, quantization, context, runtime, and measured tok/s evidence."
        ]
      },
      "scores": [
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_chinese_rating",
          "value": 1467.7867888365458,
          "normalized_0_100": 89.17,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 57,
          "lower": 1450.5346618817193,
          "upper": 1485.0389157913723,
          "category": "chinese",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_coding_rating",
          "value": 1432.3890080526437,
          "normalized_0_100": 84.96,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 84,
          "lower": 1424.2731179113189,
          "upper": 1440.5048981939683,
          "category": "coding",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_overall_rating",
          "value": 1416.6263965271041,
          "normalized_0_100": 83.09,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 86,
          "lower": 1412.3128690011044,
          "upper": 1420.9399240531036,
          "category": "overall",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        }
      ]
    },
    {
      "model_key": "grok-4-3",
      "display_name": "grok-4.3",
      "provider": "xai",
      "aliases": [
        "grok-4.3"
      ],
      "openness": null,
      "license": "Proprietary",
      "parameter_billions": null,
      "context_window": null,
      "modalities": [
        "text"
      ],
      "source_ids": [
        "lmarena"
      ],
      "composite": {
        "score_0_100": 83.07,
        "confidence": "low",
        "components": [
          {
            "source_id": "lmarena",
            "metric": "lmarena_text_overall_rating",
            "normalized_0_100": 83.07,
            "weight": 0.35
          }
        ],
        "derived_from": [
          "lmarena:lmarena_text_overall_rating"
        ],
        "limitations": [
          "This is an external model-intelligence prior, not a RunLocalAI local speed or fit measurement.",
          "Provider papers, human-preference ratings, and LiveBench judgments are not interchangeable with quantized local execution.",
          "Use this score only beside Will-It-Run fit, quantization, context, runtime, and measured tok/s evidence."
        ]
      },
      "scores": [
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_chinese_rating",
          "value": 1448.9242199490552,
          "normalized_0_100": 86.93,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 88,
          "lower": 1420.8263705096138,
          "upper": 1477.022069388497,
          "category": "chinese",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_coding_rating",
          "value": 1430.99502505931,
          "normalized_0_100": 84.79,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 86,
          "lower": 1419.052607205731,
          "upper": 1442.9374429128889,
          "category": "coding",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_overall_rating",
          "value": 1416.492106566486,
          "normalized_0_100": 83.07,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 87,
          "lower": 1410.045506268891,
          "upper": 1422.9387068640813,
          "category": "overall",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        }
      ]
    },
    {
      "model_key": "kimi-k2-thinking-turbo",
      "display_name": "kimi-k2-thinking-turbo",
      "provider": "moonshot",
      "aliases": [
        "kimi-k2-thinking-turbo"
      ],
      "openness": null,
      "license": "Modified MIT",
      "parameter_billions": null,
      "context_window": null,
      "modalities": [
        "text"
      ],
      "source_ids": [
        "lmarena"
      ],
      "composite": {
        "score_0_100": 82.82,
        "confidence": "low",
        "components": [
          {
            "source_id": "lmarena",
            "metric": "lmarena_text_overall_rating",
            "normalized_0_100": 82.82,
            "weight": 0.35
          }
        ],
        "derived_from": [
          "lmarena:lmarena_text_overall_rating"
        ],
        "limitations": [
          "This is an external model-intelligence prior, not a RunLocalAI local speed or fit measurement.",
          "Provider papers, human-preference ratings, and LiveBench judgments are not interchangeable with quantized local execution.",
          "Use this score only beside Will-It-Run fit, quantization, context, runtime, and measured tok/s evidence."
        ]
      },
      "scores": [
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_chinese_rating",
          "value": 1454.6315265583223,
          "normalized_0_100": 87.6,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 79,
          "lower": 1442.819253458603,
          "upper": 1466.4437996580414,
          "category": "chinese",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_coding_rating",
          "value": 1453.826520860843,
          "normalized_0_100": 87.51,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 47,
          "lower": 1448.0362956582578,
          "upper": 1459.616746063428,
          "category": "coding",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_overall_rating",
          "value": 1414.3973236253216,
          "normalized_0_100": 82.82,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 88,
          "lower": 1411.17102147949,
          "upper": 1417.623625771153,
          "category": "overall",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        }
      ]
    },
    {
      "model_key": "qwen3-235b-a22b-thinking-2507",
      "display_name": "qwen3-235b-a22b-thinking-2507",
      "provider": "alibaba",
      "aliases": [
        "qwen3-235b-a22b-thinking-2507"
      ],
      "openness": null,
      "license": "Apache 2.0",
      "parameter_billions": null,
      "context_window": null,
      "modalities": [
        "text"
      ],
      "source_ids": [
        "lmarena"
      ],
      "composite": {
        "score_0_100": 82.74,
        "confidence": "low",
        "components": [
          {
            "source_id": "lmarena",
            "metric": "lmarena_text_overall_rating",
            "normalized_0_100": 82.74,
            "weight": 0.35
          }
        ],
        "derived_from": [
          "lmarena:lmarena_text_overall_rating"
        ],
        "limitations": [
          "This is an external model-intelligence prior, not a RunLocalAI local speed or fit measurement.",
          "Provider papers, human-preference ratings, and LiveBench judgments are not interchangeable with quantized local execution.",
          "Use this score only beside Will-It-Run fit, quantization, context, runtime, and measured tok/s evidence."
        ]
      },
      "scores": [
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_chinese_rating",
          "value": 1472.7342622700155,
          "normalized_0_100": 89.76,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 50,
          "lower": 1443.1818313720491,
          "upper": 1502.2866931679816,
          "category": "chinese",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_coding_rating",
          "value": 1423.3902926264068,
          "normalized_0_100": 83.89,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 93,
          "lower": 1408.7539245588155,
          "upper": 1438.026660693998,
          "category": "coding",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_overall_rating",
          "value": 1413.759009439379,
          "normalized_0_100": 82.74,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 89,
          "lower": 1407.2735278110347,
          "upper": 1420.2444910677232,
          "category": "overall",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        }
      ]
    },
    {
      "model_key": "qwen3-max-2025-09-23",
      "display_name": "qwen3-max-2025-09-23",
      "provider": "alibaba",
      "aliases": [
        "qwen3-max-2025-09-23"
      ],
      "openness": null,
      "license": "Proprietary",
      "parameter_billions": null,
      "context_window": null,
      "modalities": [
        "text"
      ],
      "source_ids": [
        "lmarena"
      ],
      "composite": {
        "score_0_100": 82.66,
        "confidence": "low",
        "components": [
          {
            "source_id": "lmarena",
            "metric": "lmarena_text_overall_rating",
            "normalized_0_100": 82.66,
            "weight": 0.35
          }
        ],
        "derived_from": [
          "lmarena:lmarena_text_overall_rating"
        ],
        "limitations": [
          "This is an external model-intelligence prior, not a RunLocalAI local speed or fit measurement.",
          "Provider papers, human-preference ratings, and LiveBench judgments are not interchangeable with quantized local execution.",
          "Use this score only beside Will-It-Run fit, quantization, context, runtime, and measured tok/s evidence."
        ]
      },
      "scores": [
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_chinese_rating",
          "value": 1426.4937312671095,
          "normalized_0_100": 84.26,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 106,
          "lower": 1396.2668024022846,
          "upper": 1456.7206601319344,
          "category": "chinese",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_coding_rating",
          "value": 1439.650593606543,
          "normalized_0_100": 85.82,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 72,
          "lower": 1426.7699986939858,
          "upper": 1452.5311885191002,
          "category": "coding",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_overall_rating",
          "value": 1413.0335119123847,
          "normalized_0_100": 82.66,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 90,
          "lower": 1406.6198945952324,
          "upper": 1419.4471292295368,
          "category": "overall",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        }
      ]
    },
    {
      "model_key": "gpt-5-2",
      "display_name": "gpt-5.2",
      "provider": "openai",
      "aliases": [
        "gpt-5.2"
      ],
      "openness": null,
      "license": "Proprietary",
      "parameter_billions": null,
      "context_window": null,
      "modalities": [
        "text"
      ],
      "source_ids": [
        "lmarena"
      ],
      "composite": {
        "score_0_100": 82.6,
        "confidence": "low",
        "components": [
          {
            "source_id": "lmarena",
            "metric": "lmarena_text_overall_rating",
            "normalized_0_100": 82.6,
            "weight": 0.35
          }
        ],
        "derived_from": [
          "lmarena:lmarena_text_overall_rating"
        ],
        "limitations": [
          "This is an external model-intelligence prior, not a RunLocalAI local speed or fit measurement.",
          "Provider papers, human-preference ratings, and LiveBench judgments are not interchangeable with quantized local execution.",
          "Use this score only beside Will-It-Run fit, quantization, context, runtime, and measured tok/s evidence."
        ]
      },
      "scores": [
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_chinese_rating",
          "value": 1450.5324448990607,
          "normalized_0_100": 87.12,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 87,
          "lower": 1436.1302846558403,
          "upper": 1464.9346051422813,
          "category": "chinese",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_coding_rating",
          "value": 1435.917098906103,
          "normalized_0_100": 85.38,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 75,
          "lower": 1429.250488948248,
          "upper": 1442.583708863958,
          "category": "coding",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_overall_rating",
          "value": 1412.5825599858226,
          "normalized_0_100": 82.6,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 91,
          "lower": 1408.80102896662,
          "upper": 1416.3640910050253,
          "category": "overall",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        }
      ]
    },
    {
      "model_key": "mimo-v2-flash-non-thinking",
      "display_name": "mimo-v2-flash (non-thinking)",
      "provider": "xiaomi",
      "aliases": [
        "mimo-v2-flash (non-thinking)"
      ],
      "openness": null,
      "license": "MIT",
      "parameter_billions": null,
      "context_window": null,
      "modalities": [
        "text"
      ],
      "source_ids": [
        "lmarena"
      ],
      "composite": {
        "score_0_100": 82.55,
        "confidence": "low",
        "components": [
          {
            "source_id": "lmarena",
            "metric": "lmarena_text_overall_rating",
            "normalized_0_100": 82.55,
            "weight": 0.35
          }
        ],
        "derived_from": [
          "lmarena:lmarena_text_overall_rating"
        ],
        "limitations": [
          "This is an external model-intelligence prior, not a RunLocalAI local speed or fit measurement.",
          "Provider papers, human-preference ratings, and LiveBench judgments are not interchangeable with quantized local execution.",
          "Use this score only beside Will-It-Run fit, quantization, context, runtime, and measured tok/s evidence."
        ]
      },
      "scores": [
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_chinese_rating",
          "value": 1462.6470345487637,
          "normalized_0_100": 88.56,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 65,
          "lower": 1449.0532223651512,
          "upper": 1476.2408467323762,
          "category": "chinese",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_coding_rating",
          "value": 1440.6964863869744,
          "normalized_0_100": 85.95,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 70,
          "lower": 1434.3366216811326,
          "upper": 1447.056351092816,
          "category": "coding",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_overall_rating",
          "value": 1412.0852059916635,
          "normalized_0_100": 82.55,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 92,
          "lower": 1408.4569314118044,
          "upper": 1415.7134805715227,
          "category": "overall",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        }
      ]
    },
    {
      "model_key": "infly-infinity-parser-7b",
      "display_name": "infly/Infinity-Parser-7B",
      "provider": "infly",
      "aliases": [
        "infly/Infinity-Parser-7B"
      ],
      "openness": "open",
      "license": "Unknown",
      "parameter_billions": 8.3,
      "context_window": null,
      "modalities": [
        "text"
      ],
      "source_ids": [
        "openevals"
      ],
      "composite": {
        "score_0_100": 82.5,
        "confidence": "low",
        "components": [
          {
            "source_id": "openevals",
            "metric": "openevals_aggregate",
            "normalized_0_100": 82.5,
            "weight": 0.45
          }
        ],
        "derived_from": [
          "openevals:openevals_aggregate"
        ],
        "limitations": [
          "This is an external model-intelligence prior, not a RunLocalAI local speed or fit measurement.",
          "Provider papers, human-preference ratings, and LiveBench judgments are not interchangeable with quantized local execution.",
          "Use this score only beside Will-It-Run fit, quantization, context, runtime, and measured tok/s evidence."
        ]
      },
      "scores": [
        {
          "source_id": "openevals",
          "metric": "olm_ocr",
          "value": 82.5,
          "normalized_0_100": 82.5,
          "scale": "0-100",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/OpenEvals/leaderboard-data",
          "source_dataset": "OpenEvals/leaderboard-data",
          "source_config": "default",
          "source_split": "train",
          "source_row_count": 105,
          "captured_at": "2026-05-28T11:57:28.766Z",
          "evidence_note": "External model quality prior from OpenEvals; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "openevals",
          "metric": "openevals_aggregate",
          "value": 82.5,
          "normalized_0_100": 82.5,
          "scale": "0-100",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/OpenEvals/leaderboard-data",
          "source_dataset": "OpenEvals/leaderboard-data",
          "source_config": "default",
          "source_split": "train",
          "source_row_count": 105,
          "captured_at": "2026-05-28T11:57:28.766Z",
          "evidence_note": "External model quality prior from OpenEvals; not a RunLocalAI local execution measurement."
        }
      ]
    },
    {
      "model_key": "allenai-olmocr-2-7b-1025-fp8",
      "display_name": "allenai/olmOCR-2-7B-1025-FP8",
      "provider": "allenai",
      "aliases": [
        "allenai/olmOCR-2-7B-1025-FP8"
      ],
      "openness": "open",
      "license": "Unknown",
      "parameter_billions": 8.3,
      "context_window": null,
      "modalities": [
        "text"
      ],
      "source_ids": [
        "openevals"
      ],
      "composite": {
        "score_0_100": 82.4,
        "confidence": "low",
        "components": [
          {
            "source_id": "openevals",
            "metric": "openevals_aggregate",
            "normalized_0_100": 82.4,
            "weight": 0.45
          }
        ],
        "derived_from": [
          "openevals:openevals_aggregate"
        ],
        "limitations": [
          "This is an external model-intelligence prior, not a RunLocalAI local speed or fit measurement.",
          "Provider papers, human-preference ratings, and LiveBench judgments are not interchangeable with quantized local execution.",
          "Use this score only beside Will-It-Run fit, quantization, context, runtime, and measured tok/s evidence."
        ]
      },
      "scores": [
        {
          "source_id": "openevals",
          "metric": "olm_ocr",
          "value": 82.4,
          "normalized_0_100": 82.4,
          "scale": "0-100",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/OpenEvals/leaderboard-data",
          "source_dataset": "OpenEvals/leaderboard-data",
          "source_config": "default",
          "source_split": "train",
          "source_row_count": 105,
          "captured_at": "2026-05-28T11:57:28.766Z",
          "evidence_note": "External model quality prior from OpenEvals; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "openevals",
          "metric": "openevals_aggregate",
          "value": 82.4,
          "normalized_0_100": 82.4,
          "scale": "0-100",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/OpenEvals/leaderboard-data",
          "source_dataset": "OpenEvals/leaderboard-data",
          "source_config": "default",
          "source_split": "train",
          "source_row_count": 105,
          "captured_at": "2026-05-28T11:57:28.766Z",
          "evidence_note": "External model quality prior from OpenEvals; not a RunLocalAI local execution measurement."
        }
      ]
    },
    {
      "model_key": "titanml-olmocr-2-7b-1025-fp8",
      "display_name": "TitanML/olmOCR-2-7B-1025-FP8",
      "provider": "TitanML",
      "aliases": [
        "TitanML/olmOCR-2-7B-1025-FP8"
      ],
      "openness": "open",
      "license": "Unknown",
      "parameter_billions": 8.3,
      "context_window": null,
      "modalities": [
        "text"
      ],
      "source_ids": [
        "openevals"
      ],
      "composite": {
        "score_0_100": 82.4,
        "confidence": "low",
        "components": [
          {
            "source_id": "openevals",
            "metric": "openevals_aggregate",
            "normalized_0_100": 82.4,
            "weight": 0.45
          }
        ],
        "derived_from": [
          "openevals:openevals_aggregate"
        ],
        "limitations": [
          "This is an external model-intelligence prior, not a RunLocalAI local speed or fit measurement.",
          "Provider papers, human-preference ratings, and LiveBench judgments are not interchangeable with quantized local execution.",
          "Use this score only beside Will-It-Run fit, quantization, context, runtime, and measured tok/s evidence."
        ]
      },
      "scores": [
        {
          "source_id": "openevals",
          "metric": "olm_ocr",
          "value": 82.4,
          "normalized_0_100": 82.4,
          "scale": "0-100",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/OpenEvals/leaderboard-data",
          "source_dataset": "OpenEvals/leaderboard-data",
          "source_config": "default",
          "source_split": "train",
          "source_row_count": 105,
          "captured_at": "2026-05-28T11:57:28.766Z",
          "evidence_note": "External model quality prior from OpenEvals; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "openevals",
          "metric": "openevals_aggregate",
          "value": 82.4,
          "normalized_0_100": 82.4,
          "scale": "0-100",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/OpenEvals/leaderboard-data",
          "source_dataset": "OpenEvals/leaderboard-data",
          "source_config": "default",
          "source_split": "train",
          "source_row_count": 105,
          "captured_at": "2026-05-28T11:57:28.766Z",
          "evidence_note": "External model quality prior from OpenEvals; not a RunLocalAI local execution measurement."
        }
      ]
    },
    {
      "model_key": "grok-4-0709",
      "display_name": "grok-4-0709",
      "provider": "xai",
      "aliases": [
        "grok-4-0709"
      ],
      "openness": null,
      "license": "Proprietary",
      "parameter_billions": null,
      "context_window": null,
      "modalities": [
        "text"
      ],
      "source_ids": [
        "lmarena"
      ],
      "composite": {
        "score_0_100": 82.28,
        "confidence": "low",
        "components": [
          {
            "source_id": "lmarena",
            "metric": "lmarena_text_overall_rating",
            "normalized_0_100": 82.28,
            "weight": 0.35
          }
        ],
        "derived_from": [
          "lmarena:lmarena_text_overall_rating"
        ],
        "limitations": [
          "This is an external model-intelligence prior, not a RunLocalAI local speed or fit measurement.",
          "Provider papers, human-preference ratings, and LiveBench judgments are not interchangeable with quantized local execution.",
          "Use this score only beside Will-It-Run fit, quantization, context, runtime, and measured tok/s evidence."
        ]
      },
      "scores": [
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_chinese_rating",
          "value": 1430.1898255303272,
          "normalized_0_100": 84.7,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 101,
          "lower": 1416.6570768049278,
          "upper": 1443.7225742557268,
          "category": "chinese",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_coding_rating",
          "value": 1409.2529110216628,
          "normalized_0_100": 82.21,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 110,
          "lower": 1402.3809787505897,
          "upper": 1416.1248432927362,
          "category": "coding",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_overall_rating",
          "value": 1409.8588907260996,
          "normalized_0_100": 82.28,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 94,
          "lower": 1405.9808746362323,
          "upper": 1413.7369068159671,
          "category": "overall",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        }
      ]
    },
    {
      "model_key": "hunyuan-hy3-preview",
      "display_name": "hunyuan-hy3-preview",
      "provider": "tencent",
      "aliases": [
        "hunyuan-hy3-preview"
      ],
      "openness": null,
      "license": "tencent-hunyuan-community",
      "parameter_billions": null,
      "context_window": null,
      "modalities": [
        "text"
      ],
      "source_ids": [
        "lmarena"
      ],
      "composite": {
        "score_0_100": 82.28,
        "confidence": "low",
        "components": [
          {
            "source_id": "lmarena",
            "metric": "lmarena_text_overall_rating",
            "normalized_0_100": 82.28,
            "weight": 0.35
          }
        ],
        "derived_from": [
          "lmarena:lmarena_text_overall_rating"
        ],
        "limitations": [
          "This is an external model-intelligence prior, not a RunLocalAI local speed or fit measurement.",
          "Provider papers, human-preference ratings, and LiveBench judgments are not interchangeable with quantized local execution.",
          "Use this score only beside Will-It-Run fit, quantization, context, runtime, and measured tok/s evidence."
        ]
      },
      "scores": [
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_chinese_rating",
          "value": 1460.8567276900417,
          "normalized_0_100": 88.35,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 67,
          "lower": 1427.2219742151865,
          "upper": 1494.4914811648966,
          "category": "chinese",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_coding_rating",
          "value": 1435.577583329013,
          "normalized_0_100": 85.34,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 77,
          "lower": 1420.4201786129684,
          "upper": 1450.7349880450574,
          "category": "coding",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_overall_rating",
          "value": 1409.8593409325342,
          "normalized_0_100": 82.28,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 93,
          "lower": 1401.8567029263404,
          "upper": 1417.861978938728,
          "category": "overall",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        }
      ]
    },
    {
      "model_key": "o3-2025-04-16",
      "display_name": "o3-2025-04-16",
      "provider": "openai",
      "aliases": [
        "o3-2025-04-16"
      ],
      "openness": null,
      "license": "Proprietary",
      "parameter_billions": null,
      "context_window": null,
      "modalities": [
        "text"
      ],
      "source_ids": [
        "lmarena"
      ],
      "composite": {
        "score_0_100": 82.22,
        "confidence": "low",
        "components": [
          {
            "source_id": "lmarena",
            "metric": "lmarena_text_overall_rating",
            "normalized_0_100": 82.22,
            "weight": 0.35
          }
        ],
        "derived_from": [
          "lmarena:lmarena_text_overall_rating"
        ],
        "limitations": [
          "This is an external model-intelligence prior, not a RunLocalAI local speed or fit measurement.",
          "Provider papers, human-preference ratings, and LiveBench judgments are not interchangeable with quantized local execution.",
          "Use this score only beside Will-It-Run fit, quantization, context, runtime, and measured tok/s evidence."
        ]
      },
      "scores": [
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_chinese_rating",
          "value": 1436.994866172027,
          "normalized_0_100": 85.51,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 95,
          "lower": 1425.6927130298072,
          "upper": 1448.2970193142467,
          "category": "chinese",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_coding_rating",
          "value": 1408.0069054063622,
          "normalized_0_100": 82.06,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 111,
          "lower": 1401.8575587912148,
          "upper": 1414.1562520215098,
          "category": "coding",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_overall_rating",
          "value": 1409.3474156774691,
          "normalized_0_100": 82.22,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 95,
          "lower": 1405.768705218211,
          "upper": 1412.9261261367271,
          "category": "overall",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        }
      ]
    },
    {
      "model_key": "grok-4-fast-chat",
      "display_name": "grok-4-fast-chat",
      "provider": "xai",
      "aliases": [
        "grok-4-fast-chat"
      ],
      "openness": null,
      "license": "Proprietary",
      "parameter_billions": null,
      "context_window": null,
      "modalities": [
        "text"
      ],
      "source_ids": [
        "lmarena"
      ],
      "composite": {
        "score_0_100": 82.19,
        "confidence": "low",
        "components": [
          {
            "source_id": "lmarena",
            "metric": "lmarena_text_overall_rating",
            "normalized_0_100": 82.19,
            "weight": 0.35
          }
        ],
        "derived_from": [
          "lmarena:lmarena_text_overall_rating"
        ],
        "limitations": [
          "This is an external model-intelligence prior, not a RunLocalAI local speed or fit measurement.",
          "Provider papers, human-preference ratings, and LiveBench judgments are not interchangeable with quantized local execution.",
          "Use this score only beside Will-It-Run fit, quantization, context, runtime, and measured tok/s evidence."
        ]
      },
      "scores": [
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_chinese_rating",
          "value": 1475.410147939298,
          "normalized_0_100": 90.08,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 44,
          "lower": 1443.1683405859624,
          "upper": 1507.6519552926334,
          "category": "chinese",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_coding_rating",
          "value": 1430.0408499957643,
          "normalized_0_100": 84.68,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 87,
          "lower": 1413.679770816737,
          "upper": 1446.4019291747918,
          "category": "coding",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_overall_rating",
          "value": 1409.0712886154263,
          "normalized_0_100": 82.19,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 96,
          "lower": 1401.511421520906,
          "upper": 1416.6311557099466,
          "category": "overall",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        }
      ]
    },
    {
      "model_key": "grok-4-1-fast-reasoning",
      "display_name": "grok-4-1-fast-reasoning",
      "provider": "xai",
      "aliases": [
        "grok-4-1-fast-reasoning"
      ],
      "openness": null,
      "license": "Proprietary",
      "parameter_billions": null,
      "context_window": null,
      "modalities": [
        "text"
      ],
      "source_ids": [
        "lmarena"
      ],
      "composite": {
        "score_0_100": 82.15,
        "confidence": "low",
        "components": [
          {
            "source_id": "lmarena",
            "metric": "lmarena_text_overall_rating",
            "normalized_0_100": 82.15,
            "weight": 0.35
          }
        ],
        "derived_from": [
          "lmarena:lmarena_text_overall_rating"
        ],
        "limitations": [
          "This is an external model-intelligence prior, not a RunLocalAI local speed or fit measurement.",
          "Provider papers, human-preference ratings, and LiveBench judgments are not interchangeable with quantized local execution.",
          "Use this score only beside Will-It-Run fit, quantization, context, runtime, and measured tok/s evidence."
        ]
      },
      "scores": [
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_chinese_rating",
          "value": 1440.5761291604642,
          "normalized_0_100": 85.93,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 94,
          "lower": 1428.5575586497998,
          "upper": 1452.5946996711286,
          "category": "chinese",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_coding_rating",
          "value": 1412.8508806159284,
          "normalized_0_100": 82.64,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 107,
          "lower": 1406.8722710242214,
          "upper": 1418.8294902076354,
          "category": "coding",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_overall_rating",
          "value": 1408.78700667564,
          "normalized_0_100": 82.15,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 97,
          "lower": 1405.4559413686081,
          "upper": 1412.1180719826716,
          "category": "overall",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        }
      ]
    },
    {
      "model_key": "qwen3-5-27b",
      "display_name": "qwen3.5-27b",
      "provider": "alibaba",
      "aliases": [
        "qwen3.5-27b"
      ],
      "openness": null,
      "license": "Apache 2.0",
      "parameter_billions": null,
      "context_window": null,
      "modalities": [
        "text"
      ],
      "source_ids": [
        "lmarena"
      ],
      "composite": {
        "score_0_100": 82.11,
        "confidence": "low",
        "components": [
          {
            "source_id": "lmarena",
            "metric": "lmarena_text_overall_rating",
            "normalized_0_100": 82.11,
            "weight": 0.35
          }
        ],
        "derived_from": [
          "lmarena:lmarena_text_overall_rating"
        ],
        "limitations": [
          "This is an external model-intelligence prior, not a RunLocalAI local speed or fit measurement.",
          "Provider papers, human-preference ratings, and LiveBench judgments are not interchangeable with quantized local execution.",
          "Use this score only beside Will-It-Run fit, quantization, context, runtime, and measured tok/s evidence."
        ]
      },
      "scores": [
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_chinese_rating",
          "value": 1476.907067832355,
          "normalized_0_100": 90.25,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 43,
          "lower": 1459.486338249811,
          "upper": 1494.3277974148994,
          "category": "chinese",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_coding_rating",
          "value": 1420.1479674170823,
          "normalized_0_100": 83.5,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 97,
          "lower": 1412.1680487505669,
          "upper": 1428.1278860835978,
          "category": "coding",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_overall_rating",
          "value": 1408.463889320633,
          "normalized_0_100": 82.11,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 98,
          "lower": 1403.8751279944472,
          "upper": 1413.0526506468184,
          "category": "overall",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        }
      ]
    },
    {
      "model_key": "qwen-qwen3-5-9b",
      "display_name": "Qwen/Qwen3.5-9B",
      "provider": "Qwen",
      "aliases": [
        "Qwen/Qwen3.5-9B"
      ],
      "openness": "open",
      "license": "Unknown",
      "parameter_billions": 9.7,
      "context_window": null,
      "modalities": [
        "text"
      ],
      "source_ids": [
        "openevals"
      ],
      "composite": {
        "score_0_100": 81.98,
        "confidence": "low",
        "components": [
          {
            "source_id": "openevals",
            "metric": "openevals_aggregate",
            "normalized_0_100": 81.98,
            "weight": 0.45
          }
        ],
        "derived_from": [
          "openevals:openevals_aggregate"
        ],
        "limitations": [
          "This is an external model-intelligence prior, not a RunLocalAI local speed or fit measurement.",
          "Provider papers, human-preference ratings, and LiveBench judgments are not interchangeable with quantized local execution.",
          "Use this score only beside Will-It-Run fit, quantization, context, runtime, and measured tok/s evidence."
        ]
      },
      "scores": [
        {
          "source_id": "openevals",
          "metric": "aime2026",
          "value": 92.5,
          "normalized_0_100": 92.5,
          "scale": "0-100",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/OpenEvals/leaderboard-data",
          "source_dataset": "OpenEvals/leaderboard-data",
          "source_config": "default",
          "source_split": "train",
          "source_row_count": 105,
          "captured_at": "2026-05-28T11:57:28.766Z",
          "evidence_note": "External model quality prior from OpenEvals; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "openevals",
          "metric": "gpqa",
          "value": 81.7,
          "normalized_0_100": 81.7,
          "scale": "0-100",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/OpenEvals/leaderboard-data",
          "source_dataset": "OpenEvals/leaderboard-data",
          "source_config": "default",
          "source_split": "train",
          "source_row_count": 105,
          "captured_at": "2026-05-28T11:57:28.766Z",
          "evidence_note": "External model quality prior from OpenEvals; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "openevals",
          "metric": "hmmt2026",
          "value": 71.21,
          "normalized_0_100": 71.21,
          "scale": "0-100",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/OpenEvals/leaderboard-data",
          "source_dataset": "OpenEvals/leaderboard-data",
          "source_config": "default",
          "source_split": "train",
          "source_row_count": 105,
          "captured_at": "2026-05-28T11:57:28.766Z",
          "evidence_note": "External model quality prior from OpenEvals; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "openevals",
          "metric": "mmlu_pro",
          "value": 82.5,
          "normalized_0_100": 82.5,
          "scale": "0-100",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/OpenEvals/leaderboard-data",
          "source_dataset": "OpenEvals/leaderboard-data",
          "source_config": "default",
          "source_split": "train",
          "source_row_count": 105,
          "captured_at": "2026-05-28T11:57:28.766Z",
          "evidence_note": "External model quality prior from OpenEvals; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "openevals",
          "metric": "openevals_aggregate",
          "value": 81.98,
          "normalized_0_100": 81.98,
          "scale": "0-100",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/OpenEvals/leaderboard-data",
          "source_dataset": "OpenEvals/leaderboard-data",
          "source_config": "default",
          "source_split": "train",
          "source_row_count": 105,
          "captured_at": "2026-05-28T11:57:28.766Z",
          "evidence_note": "External model quality prior from OpenEvals; not a RunLocalAI local execution measurement."
        }
      ]
    },
    {
      "model_key": "gemini-2-5-flash-preview-09-2025",
      "display_name": "gemini-2.5-flash-preview-09-2025",
      "provider": "google",
      "aliases": [
        "gemini-2.5-flash-preview-09-2025"
      ],
      "openness": null,
      "license": "Proprietary",
      "parameter_billions": null,
      "context_window": null,
      "modalities": [
        "text"
      ],
      "source_ids": [
        "lmarena"
      ],
      "composite": {
        "score_0_100": 81.94,
        "confidence": "low",
        "components": [
          {
            "source_id": "lmarena",
            "metric": "lmarena_text_overall_rating",
            "normalized_0_100": 81.94,
            "weight": 0.35
          }
        ],
        "derived_from": [
          "lmarena:lmarena_text_overall_rating"
        ],
        "limitations": [
          "This is an external model-intelligence prior, not a RunLocalAI local speed or fit measurement.",
          "Provider papers, human-preference ratings, and LiveBench judgments are not interchangeable with quantized local execution.",
          "Use this score only beside Will-It-Run fit, quantization, context, runtime, and measured tok/s evidence."
        ]
      },
      "scores": [
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_chinese_rating",
          "value": 1450.8127591528896,
          "normalized_0_100": 87.15,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 86,
          "lower": 1435.1529969940455,
          "upper": 1466.4725213117338,
          "category": "chinese",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_coding_rating",
          "value": 1401.7517488326848,
          "normalized_0_100": 81.32,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 119,
          "lower": 1394.2618526214155,
          "upper": 1409.2416450439543,
          "category": "coding",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_overall_rating",
          "value": 1406.9983171009912,
          "normalized_0_100": 81.94,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 99,
          "lower": 1402.9720007221397,
          "upper": 1411.0246334798428,
          "category": "overall",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        }
      ]
    },
    {
      "model_key": "hunyuan-vision-1-5-thinking",
      "display_name": "hunyuan-vision-1.5-thinking",
      "provider": "tencent",
      "aliases": [
        "hunyuan-vision-1.5-thinking"
      ],
      "openness": null,
      "license": "Proprietary",
      "parameter_billions": null,
      "context_window": null,
      "modalities": [
        "text"
      ],
      "source_ids": [
        "lmarena"
      ],
      "composite": {
        "score_0_100": 81.9,
        "confidence": "low",
        "components": [
          {
            "source_id": "lmarena",
            "metric": "lmarena_text_overall_rating",
            "normalized_0_100": 81.9,
            "weight": 0.35
          }
        ],
        "derived_from": [
          "lmarena:lmarena_text_overall_rating"
        ],
        "limitations": [
          "This is an external model-intelligence prior, not a RunLocalAI local speed or fit measurement.",
          "Provider papers, human-preference ratings, and LiveBench judgments are not interchangeable with quantized local execution.",
          "Use this score only beside Will-It-Run fit, quantization, context, runtime, and measured tok/s evidence."
        ]
      },
      "scores": [
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_coding_rating",
          "value": 1421.0283996436642,
          "normalized_0_100": 83.61,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 96,
          "lower": 1393.9900173507192,
          "upper": 1448.066781936609,
          "category": "coding",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_overall_rating",
          "value": 1406.6935225015209,
          "normalized_0_100": 81.9,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 100,
          "lower": 1394.4383186609696,
          "upper": 1418.948726342072,
          "category": "overall",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        }
      ]
    },
    {
      "model_key": "gemini-2-0-flash-exp",
      "display_name": "gemini-2.0-flash-exp",
      "provider": null,
      "aliases": [
        "gemini-2.0-flash-exp"
      ],
      "openness": null,
      "license": null,
      "parameter_billions": null,
      "context_window": null,
      "modalities": [
        "text"
      ],
      "source_ids": [
        "livebench"
      ],
      "composite": {
        "score_0_100": 81.85,
        "confidence": "low",
        "components": [
          {
            "source_id": "livebench",
            "metric": "livebench_overall_mean_judgment",
            "normalized_0_100": 81.85,
            "weight": 0.2
          }
        ],
        "derived_from": [
          "livebench:livebench_overall_mean_judgment"
        ],
        "limitations": [
          "This is an external model-intelligence prior, not a RunLocalAI local speed or fit measurement.",
          "Provider papers, human-preference ratings, and LiveBench judgments are not interchangeable with quantized local execution.",
          "Use this score only beside Will-It-Run fit, quantization, context, runtime, and measured tok/s evidence."
        ]
      },
      "scores": [
        {
          "source_id": "livebench",
          "metric": "livebench_category_coding_mean_judgment",
          "value": 1,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "coding",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench category aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_category_instruction_following_mean_judgment",
          "value": 1,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "instruction_following",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench category aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_category_language_mean_judgment",
          "value": 0.745882,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "language",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench category aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_overall_mean_judgment",
          "value": 0.818487,
          "normalized_0_100": 81.85,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench judgment aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_task_lcb_generation_mean_judgment",
          "value": 1,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "coding",
          "task": "LCB_generation",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench task aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_task_paraphrase_mean_judgment",
          "value": 1,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "instruction_following",
          "task": "paraphrase",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench task aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_task_plot_unscrambling_mean_judgment",
          "value": 0.364706,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "language",
          "task": "plot_unscrambling",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench task aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_task_typos_mean_judgment",
          "value": 1,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "language",
          "task": "typos",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench task aggregate; not a RunLocalAI local execution measurement."
        }
      ]
    },
    {
      "model_key": "gpt-5-high",
      "display_name": "gpt-5-high",
      "provider": "openai",
      "aliases": [
        "gpt-5-high"
      ],
      "openness": null,
      "license": "Proprietary",
      "parameter_billions": null,
      "context_window": null,
      "modalities": [
        "text"
      ],
      "source_ids": [
        "lmarena"
      ],
      "composite": {
        "score_0_100": 81.74,
        "confidence": "low",
        "components": [
          {
            "source_id": "lmarena",
            "metric": "lmarena_text_overall_rating",
            "normalized_0_100": 81.74,
            "weight": 0.35
          }
        ],
        "derived_from": [
          "lmarena:lmarena_text_overall_rating"
        ],
        "limitations": [
          "This is an external model-intelligence prior, not a RunLocalAI local speed or fit measurement.",
          "Provider papers, human-preference ratings, and LiveBench judgments are not interchangeable with quantized local execution.",
          "Use this score only beside Will-It-Run fit, quantization, context, runtime, and measured tok/s evidence."
        ]
      },
      "scores": [
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_chinese_rating",
          "value": 1424.2446893852623,
          "normalized_0_100": 83.99,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 109,
          "lower": 1408.7460704075918,
          "upper": 1439.743308362933,
          "category": "chinese",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_coding_rating",
          "value": 1434.8465192973083,
          "normalized_0_100": 85.25,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 79,
          "lower": 1426.9305641995543,
          "upper": 1442.762474395062,
          "category": "coding",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_overall_rating",
          "value": 1405.2717220890688,
          "normalized_0_100": 81.74,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 101,
          "lower": 1400.8526189051224,
          "upper": 1409.6908252730152,
          "category": "overall",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        }
      ]
    },
    {
      "model_key": "step-3-5-flash",
      "display_name": "step-3.5-flash",
      "provider": "stepfun",
      "aliases": [
        "step-3.5-flash"
      ],
      "openness": null,
      "license": "Apache 2.0",
      "parameter_billions": null,
      "context_window": null,
      "modalities": [
        "text"
      ],
      "source_ids": [
        "lmarena"
      ],
      "composite": {
        "score_0_100": 81.59,
        "confidence": "low",
        "components": [
          {
            "source_id": "lmarena",
            "metric": "lmarena_text_overall_rating",
            "normalized_0_100": 81.59,
            "weight": 0.35
          }
        ],
        "derived_from": [
          "lmarena:lmarena_text_overall_rating"
        ],
        "limitations": [
          "This is an external model-intelligence prior, not a RunLocalAI local speed or fit measurement.",
          "Provider papers, human-preference ratings, and LiveBench judgments are not interchangeable with quantized local execution.",
          "Use this score only beside Will-It-Run fit, quantization, context, runtime, and measured tok/s evidence."
        ]
      },
      "scores": [
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_chinese_rating",
          "value": 1448.7307650248656,
          "normalized_0_100": 86.9,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 89,
          "lower": 1432.0758658838213,
          "upper": 1465.38566416591,
          "category": "chinese",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_coding_rating",
          "value": 1434.8853467085403,
          "normalized_0_100": 85.26,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 78,
          "lower": 1427.4997064731808,
          "upper": 1442.2709869438997,
          "category": "coding",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_overall_rating",
          "value": 1404.0190566617848,
          "normalized_0_100": 81.59,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 102,
          "lower": 1399.8536523299697,
          "upper": 1408.1844609936,
          "category": "overall",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        }
      ]
    },
    {
      "model_key": "gpt-5-chat",
      "display_name": "gpt-5-chat",
      "provider": "openai",
      "aliases": [
        "gpt-5-chat"
      ],
      "openness": null,
      "license": "Proprietary",
      "parameter_billions": null,
      "context_window": null,
      "modalities": [
        "text"
      ],
      "source_ids": [
        "lmarena"
      ],
      "composite": {
        "score_0_100": 81.54,
        "confidence": "low",
        "components": [
          {
            "source_id": "lmarena",
            "metric": "lmarena_text_overall_rating",
            "normalized_0_100": 81.54,
            "weight": 0.35
          }
        ],
        "derived_from": [
          "lmarena:lmarena_text_overall_rating"
        ],
        "limitations": [
          "This is an external model-intelligence prior, not a RunLocalAI local speed or fit measurement.",
          "Provider papers, human-preference ratings, and LiveBench judgments are not interchangeable with quantized local execution.",
          "Use this score only beside Will-It-Run fit, quantization, context, runtime, and measured tok/s evidence."
        ]
      },
      "scores": [
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_chinese_rating",
          "value": 1420.3781425885563,
          "normalized_0_100": 83.53,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 112,
          "lower": 1405.5569816387913,
          "upper": 1435.1993035383216,
          "category": "chinese",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_coding_rating",
          "value": 1400.4305285511573,
          "normalized_0_100": 81.16,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 121,
          "lower": 1392.5306204323724,
          "upper": 1408.3304366699422,
          "category": "coding",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_overall_rating",
          "value": 1403.6031376019764,
          "normalized_0_100": 81.54,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 103,
          "lower": 1399.3528646488953,
          "upper": 1407.8534105550575,
          "category": "overall",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        }
      ]
    },
    {
      "model_key": "qwen3-vl-235b-a22b-thinking",
      "display_name": "qwen3-vl-235b-a22b-thinking",
      "provider": "alibaba",
      "aliases": [
        "qwen3-vl-235b-a22b-thinking"
      ],
      "openness": null,
      "license": "Apache 2.0",
      "parameter_billions": null,
      "context_window": null,
      "modalities": [
        "text"
      ],
      "source_ids": [
        "lmarena"
      ],
      "composite": {
        "score_0_100": 81.21,
        "confidence": "low",
        "components": [
          {
            "source_id": "lmarena",
            "metric": "lmarena_text_overall_rating",
            "normalized_0_100": 81.21,
            "weight": 0.35
          }
        ],
        "derived_from": [
          "lmarena:lmarena_text_overall_rating"
        ],
        "limitations": [
          "This is an external model-intelligence prior, not a RunLocalAI local speed or fit measurement.",
          "Provider papers, human-preference ratings, and LiveBench judgments are not interchangeable with quantized local execution.",
          "Use this score only beside Will-It-Run fit, quantization, context, runtime, and measured tok/s evidence."
        ]
      },
      "scores": [
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_chinese_rating",
          "value": 1451.217423927318,
          "normalized_0_100": 87.2,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 84,
          "lower": 1416.4852757287688,
          "upper": 1485.9495721258672,
          "category": "chinese",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_coding_rating",
          "value": 1427.9538554343756,
          "normalized_0_100": 84.43,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 90,
          "lower": 1413.5781543295595,
          "upper": 1442.3295565391916,
          "category": "coding",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_overall_rating",
          "value": 1400.8373115534307,
          "normalized_0_100": 81.21,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 104,
          "lower": 1394.0552798182378,
          "upper": 1407.6193432886237,
          "category": "overall",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        }
      ]
    },
    {
      "model_key": "deepseek-ai-deepseek-v3-0324",
      "display_name": "deepseek-ai/DeepSeek-V3-0324",
      "provider": "deepseek-ai",
      "aliases": [
        "deepseek-ai/DeepSeek-V3-0324"
      ],
      "openness": "open",
      "license": "Unknown",
      "parameter_billions": 684.5,
      "context_window": null,
      "modalities": [
        "text"
      ],
      "source_ids": [
        "openevals"
      ],
      "composite": {
        "score_0_100": 81.2,
        "confidence": "low",
        "components": [
          {
            "source_id": "openevals",
            "metric": "openevals_aggregate",
            "normalized_0_100": 81.2,
            "weight": 0.45
          }
        ],
        "derived_from": [
          "openevals:openevals_aggregate"
        ],
        "limitations": [
          "This is an external model-intelligence prior, not a RunLocalAI local speed or fit measurement.",
          "Provider papers, human-preference ratings, and LiveBench judgments are not interchangeable with quantized local execution.",
          "Use this score only beside Will-It-Run fit, quantization, context, runtime, and measured tok/s evidence."
        ]
      },
      "scores": [
        {
          "source_id": "openevals",
          "metric": "mmlu_pro",
          "value": 81.2,
          "normalized_0_100": 81.2,
          "scale": "0-100",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/OpenEvals/leaderboard-data",
          "source_dataset": "OpenEvals/leaderboard-data",
          "source_config": "default",
          "source_split": "train",
          "source_row_count": 105,
          "captured_at": "2026-05-28T11:57:28.766Z",
          "evidence_note": "External model quality prior from OpenEvals; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "openevals",
          "metric": "openevals_aggregate",
          "value": 81.2,
          "normalized_0_100": 81.2,
          "scale": "0-100",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/OpenEvals/leaderboard-data",
          "source_dataset": "OpenEvals/leaderboard-data",
          "source_config": "default",
          "source_split": "train",
          "source_row_count": 105,
          "captured_at": "2026-05-28T11:57:28.766Z",
          "evidence_note": "External model quality prior from OpenEvals; not a RunLocalAI local execution measurement."
        }
      ]
    },
    {
      "model_key": "hunyuan-t1-20250711",
      "display_name": "hunyuan-t1-20250711",
      "provider": "tencent",
      "aliases": [
        "hunyuan-t1-20250711"
      ],
      "openness": null,
      "license": "Proprietary",
      "parameter_billions": null,
      "context_window": null,
      "modalities": [
        "text"
      ],
      "source_ids": [
        "lmarena"
      ],
      "composite": {
        "score_0_100": 81.08,
        "confidence": "low",
        "components": [
          {
            "source_id": "lmarena",
            "metric": "lmarena_text_overall_rating",
            "normalized_0_100": 81.08,
            "weight": 0.35
          }
        ],
        "derived_from": [
          "lmarena:lmarena_text_overall_rating"
        ],
        "limitations": [
          "This is an external model-intelligence prior, not a RunLocalAI local speed or fit measurement.",
          "Provider papers, human-preference ratings, and LiveBench judgments are not interchangeable with quantized local execution.",
          "Use this score only beside Will-It-Run fit, quantization, context, runtime, and measured tok/s evidence."
        ]
      },
      "scores": [
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_chinese_rating",
          "value": 1431.2310464255227,
          "normalized_0_100": 84.82,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 98,
          "lower": 1394.3949376801797,
          "upper": 1468.0671551708656,
          "category": "chinese",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_coding_rating",
          "value": 1387.6758636759746,
          "normalized_0_100": 79.64,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 134,
          "lower": 1367.862392336815,
          "upper": 1407.4893350151342,
          "category": "coding",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_overall_rating",
          "value": 1399.7879327141084,
          "normalized_0_100": 81.08,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 105,
          "lower": 1391.2306406844316,
          "upper": 1408.3452247437854,
          "category": "overall",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        }
      ]
    },
    {
      "model_key": "amazon-nova-experimental-chat-26-01-10",
      "display_name": "amazon-nova-experimental-chat-26-01-10",
      "provider": "amazon",
      "aliases": [
        "amazon-nova-experimental-chat-26-01-10"
      ],
      "openness": null,
      "license": "Proprietary",
      "parameter_billions": null,
      "context_window": null,
      "modalities": [
        "text"
      ],
      "source_ids": [
        "lmarena"
      ],
      "composite": {
        "score_0_100": 80.98,
        "confidence": "low",
        "components": [
          {
            "source_id": "lmarena",
            "metric": "lmarena_text_overall_rating",
            "normalized_0_100": 80.98,
            "weight": 0.35
          }
        ],
        "derived_from": [
          "lmarena:lmarena_text_overall_rating"
        ],
        "limitations": [
          "This is an external model-intelligence prior, not a RunLocalAI local speed or fit measurement.",
          "Provider papers, human-preference ratings, and LiveBench judgments are not interchangeable with quantized local execution.",
          "Use this score only beside Will-It-Run fit, quantization, context, runtime, and measured tok/s evidence."
        ]
      },
      "scores": [
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_coding_rating",
          "value": 1445.2500545227451,
          "normalized_0_100": 86.49,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 60,
          "lower": 1424.1812324102732,
          "upper": 1466.318876635217,
          "category": "coding",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_overall_rating",
          "value": 1398.9380573626486,
          "normalized_0_100": 80.98,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 106,
          "lower": 1389.1086528225426,
          "upper": 1408.7674619027543,
          "category": "overall",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        }
      ]
    },
    {
      "model_key": "qwen3-5-flash",
      "display_name": "qwen3.5-flash",
      "provider": "alibaba",
      "aliases": [
        "qwen3.5-flash"
      ],
      "openness": null,
      "license": "Proprietary",
      "parameter_billions": null,
      "context_window": null,
      "modalities": [
        "text"
      ],
      "source_ids": [
        "lmarena"
      ],
      "composite": {
        "score_0_100": 80.8,
        "confidence": "low",
        "components": [
          {
            "source_id": "lmarena",
            "metric": "lmarena_text_overall_rating",
            "normalized_0_100": 80.8,
            "weight": 0.35
          }
        ],
        "derived_from": [
          "lmarena:lmarena_text_overall_rating"
        ],
        "limitations": [
          "This is an external model-intelligence prior, not a RunLocalAI local speed or fit measurement.",
          "Provider papers, human-preference ratings, and LiveBench judgments are not interchangeable with quantized local execution.",
          "Use this score only beside Will-It-Run fit, quantization, context, runtime, and measured tok/s evidence."
        ]
      },
      "scores": [
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_chinese_rating",
          "value": 1466.2442222741706,
          "normalized_0_100": 88.99,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 62,
          "lower": 1448.8026786714613,
          "upper": 1483.68576587688,
          "category": "chinese",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_coding_rating",
          "value": 1406.681397735576,
          "normalized_0_100": 81.9,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 114,
          "lower": 1398.8665844872294,
          "upper": 1414.4962109839223,
          "category": "coding",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_overall_rating",
          "value": 1397.4226510648593,
          "normalized_0_100": 80.8,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 107,
          "lower": 1392.8050920129244,
          "upper": 1402.040210116794,
          "category": "overall",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        }
      ]
    },
    {
      "model_key": "grok-4-fast-reasoning",
      "display_name": "grok-4-fast-reasoning",
      "provider": "xai",
      "aliases": [
        "grok-4-fast-reasoning"
      ],
      "openness": null,
      "license": "Proprietary",
      "parameter_billions": null,
      "context_window": null,
      "modalities": [
        "text"
      ],
      "source_ids": [
        "lmarena"
      ],
      "composite": {
        "score_0_100": 80.79,
        "confidence": "low",
        "components": [
          {
            "source_id": "lmarena",
            "metric": "lmarena_text_overall_rating",
            "normalized_0_100": 80.79,
            "weight": 0.35
          }
        ],
        "derived_from": [
          "lmarena:lmarena_text_overall_rating"
        ],
        "limitations": [
          "This is an external model-intelligence prior, not a RunLocalAI local speed or fit measurement.",
          "Provider papers, human-preference ratings, and LiveBench judgments are not interchangeable with quantized local execution.",
          "Use this score only beside Will-It-Run fit, quantization, context, runtime, and measured tok/s evidence."
        ]
      },
      "scores": [
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_chinese_rating",
          "value": 1432.8304679334187,
          "normalized_0_100": 85.01,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 97,
          "lower": 1410.3017443233853,
          "upper": 1455.3591915434524,
          "category": "chinese",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_coding_rating",
          "value": 1418.3954937288456,
          "normalized_0_100": 83.3,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 99,
          "lower": 1409.0305850003224,
          "upper": 1427.7604024573686,
          "category": "coding",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_overall_rating",
          "value": 1397.3439844747693,
          "normalized_0_100": 80.79,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 108,
          "lower": 1392.338970157992,
          "upper": 1402.3489987915466,
          "category": "overall",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        }
      ]
    },
    {
      "model_key": "qwen3-5-35b-a3b",
      "display_name": "qwen3.5-35b-a3b",
      "provider": "alibaba",
      "aliases": [
        "qwen3.5-35b-a3b"
      ],
      "openness": null,
      "license": "Apache 2.0",
      "parameter_billions": null,
      "context_window": null,
      "modalities": [
        "text"
      ],
      "source_ids": [
        "lmarena"
      ],
      "composite": {
        "score_0_100": 80.77,
        "confidence": "low",
        "components": [
          {
            "source_id": "lmarena",
            "metric": "lmarena_text_overall_rating",
            "normalized_0_100": 80.77,
            "weight": 0.35
          }
        ],
        "derived_from": [
          "lmarena:lmarena_text_overall_rating"
        ],
        "limitations": [
          "This is an external model-intelligence prior, not a RunLocalAI local speed or fit measurement.",
          "Provider papers, human-preference ratings, and LiveBench judgments are not interchangeable with quantized local execution.",
          "Use this score only beside Will-It-Run fit, quantization, context, runtime, and measured tok/s evidence."
        ]
      },
      "scores": [
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_chinese_rating",
          "value": 1468.579751588756,
          "normalized_0_100": 89.26,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 56,
          "lower": 1451.4568521939595,
          "upper": 1485.7026509835523,
          "category": "chinese",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_coding_rating",
          "value": 1410.601863379516,
          "normalized_0_100": 82.37,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 109,
          "lower": 1402.7224021884938,
          "upper": 1418.4813245705384,
          "category": "coding",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_overall_rating",
          "value": 1397.133938802684,
          "normalized_0_100": 80.77,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 109,
          "lower": 1392.6265298210399,
          "upper": 1401.6413477843282,
          "category": "overall",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        }
      ]
    },
    {
      "model_key": "minimax-m2-7",
      "display_name": "minimax-m2.7",
      "provider": "minimax",
      "aliases": [
        "minimax-m2.7"
      ],
      "openness": null,
      "license": "Modified MIT",
      "parameter_billions": null,
      "context_window": null,
      "modalities": [
        "text"
      ],
      "source_ids": [
        "lmarena"
      ],
      "composite": {
        "score_0_100": 80.6,
        "confidence": "low",
        "components": [
          {
            "source_id": "lmarena",
            "metric": "lmarena_text_overall_rating",
            "normalized_0_100": 80.6,
            "weight": 0.35
          }
        ],
        "derived_from": [
          "lmarena:lmarena_text_overall_rating"
        ],
        "limitations": [
          "This is an external model-intelligence prior, not a RunLocalAI local speed or fit measurement.",
          "Provider papers, human-preference ratings, and LiveBench judgments are not interchangeable with quantized local execution.",
          "Use this score only beside Will-It-Run fit, quantization, context, runtime, and measured tok/s evidence."
        ]
      },
      "scores": [
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_chinese_rating",
          "value": 1420.088517826092,
          "normalized_0_100": 83.5,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 113,
          "lower": 1399.6889011616247,
          "upper": 1440.488134490559,
          "category": "chinese",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_coding_rating",
          "value": 1442.8354114209644,
          "normalized_0_100": 86.2,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 67,
          "lower": 1433.9719667436318,
          "upper": 1451.6988560982973,
          "category": "coding",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_overall_rating",
          "value": 1395.7576623616537,
          "normalized_0_100": 80.6,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 110,
          "lower": 1390.519689968275,
          "upper": 1400.9956347550324,
          "category": "overall",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        }
      ]
    },
    {
      "model_key": "qwen-qwen3-next-80b-a3b-instruct",
      "display_name": "Qwen/Qwen3-Next-80B-A3B-Instruct",
      "provider": "Qwen",
      "aliases": [
        "Qwen/Qwen3-Next-80B-A3B-Instruct"
      ],
      "openness": "open",
      "license": "Unknown",
      "parameter_billions": 81.3,
      "context_window": null,
      "modalities": [
        "text"
      ],
      "source_ids": [
        "openevals"
      ],
      "composite": {
        "score_0_100": 80.6,
        "confidence": "low",
        "components": [
          {
            "source_id": "openevals",
            "metric": "openevals_aggregate",
            "normalized_0_100": 80.6,
            "weight": 0.45
          }
        ],
        "derived_from": [
          "openevals:openevals_aggregate"
        ],
        "limitations": [
          "This is an external model-intelligence prior, not a RunLocalAI local speed or fit measurement.",
          "Provider papers, human-preference ratings, and LiveBench judgments are not interchangeable with quantized local execution.",
          "Use this score only beside Will-It-Run fit, quantization, context, runtime, and measured tok/s evidence."
        ]
      },
      "scores": [
        {
          "source_id": "openevals",
          "metric": "mmlu_pro",
          "value": 80.6,
          "normalized_0_100": 80.6,
          "scale": "0-100",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/OpenEvals/leaderboard-data",
          "source_dataset": "OpenEvals/leaderboard-data",
          "source_config": "default",
          "source_split": "train",
          "source_row_count": 105,
          "captured_at": "2026-05-28T11:57:28.766Z",
          "evidence_note": "External model quality prior from OpenEvals; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "openevals",
          "metric": "openevals_aggregate",
          "value": 80.6,
          "normalized_0_100": 80.6,
          "scale": "0-100",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/OpenEvals/leaderboard-data",
          "source_dataset": "OpenEvals/leaderboard-data",
          "source_config": "default",
          "source_split": "train",
          "source_row_count": 105,
          "captured_at": "2026-05-28T11:57:28.766Z",
          "evidence_note": "External model quality prior from OpenEvals; not a RunLocalAI local execution measurement."
        }
      ]
    },
    {
      "model_key": "mimo-v2-flash-thinking",
      "display_name": "mimo-v2-flash (thinking)",
      "provider": "xiaomi",
      "aliases": [
        "mimo-v2-flash (thinking)"
      ],
      "openness": null,
      "license": "MIT",
      "parameter_billions": null,
      "context_window": null,
      "modalities": [
        "text"
      ],
      "source_ids": [
        "lmarena"
      ],
      "composite": {
        "score_0_100": 80.57,
        "confidence": "low",
        "components": [
          {
            "source_id": "lmarena",
            "metric": "lmarena_text_overall_rating",
            "normalized_0_100": 80.57,
            "weight": 0.35
          }
        ],
        "derived_from": [
          "lmarena:lmarena_text_overall_rating"
        ],
        "limitations": [
          "This is an external model-intelligence prior, not a RunLocalAI local speed or fit measurement.",
          "Provider papers, human-preference ratings, and LiveBench judgments are not interchangeable with quantized local execution.",
          "Use this score only beside Will-It-Run fit, quantization, context, runtime, and measured tok/s evidence."
        ]
      },
      "scores": [
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_chinese_rating",
          "value": 1429.1351296488428,
          "normalized_0_100": 84.57,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 103,
          "lower": 1402.1221749816389,
          "upper": 1456.1480843160468,
          "category": "chinese",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_coding_rating",
          "value": 1418.2321622233733,
          "normalized_0_100": 83.28,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 100,
          "lower": 1406.3806033593617,
          "upper": 1430.0837210873851,
          "category": "coding",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_overall_rating",
          "value": 1395.4340874012912,
          "normalized_0_100": 80.57,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 111,
          "lower": 1389.2227613351458,
          "upper": 1401.6454134674368,
          "category": "overall",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        }
      ]
    },
    {
      "model_key": "claude-3-7-sonnet-20250219-base",
      "display_name": "claude-3-7-sonnet-20250219-base",
      "provider": null,
      "aliases": [
        "claude-3-7-sonnet-20250219-base"
      ],
      "openness": null,
      "license": null,
      "parameter_billions": null,
      "context_window": null,
      "modalities": [
        "text"
      ],
      "source_ids": [
        "livebench"
      ],
      "composite": {
        "score_0_100": 80.52,
        "confidence": "low",
        "components": [
          {
            "source_id": "livebench",
            "metric": "livebench_overall_mean_judgment",
            "normalized_0_100": 80.52,
            "weight": 0.2
          }
        ],
        "derived_from": [
          "livebench:livebench_overall_mean_judgment"
        ],
        "limitations": [
          "This is an external model-intelligence prior, not a RunLocalAI local speed or fit measurement.",
          "Provider papers, human-preference ratings, and LiveBench judgments are not interchangeable with quantized local execution.",
          "Use this score only beside Will-It-Run fit, quantization, context, runtime, and measured tok/s evidence."
        ]
      },
      "scores": [
        {
          "source_id": "livebench",
          "metric": "livebench_category_coding_mean_judgment",
          "value": 1,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "coding",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench category aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_category_instruction_following_mean_judgment",
          "value": 0.4,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "instruction_following",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench category aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_category_language_mean_judgment",
          "value": 0.809091,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "language",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench category aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_overall_mean_judgment",
          "value": 0.805195,
          "normalized_0_100": 80.52,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench judgment aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_task_coding_completion_mean_judgment",
          "value": 1,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "coding",
          "task": "coding_completion",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench task aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_task_lcb_generation_mean_judgment",
          "value": 1,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "coding",
          "task": "LCB_generation",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench task aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_task_paraphrase_mean_judgment",
          "value": 0.4,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "instruction_following",
          "task": "paraphrase",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench task aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_task_plot_unscrambling_mean_judgment",
          "value": 0.236364,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "language",
          "task": "plot_unscrambling",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench task aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_task_typos_mean_judgment",
          "value": 1,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "language",
          "task": "typos",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench task aggregate; not a RunLocalAI local execution measurement."
        }
      ]
    },
    {
      "model_key": "amazon-nova-experimental-chat-10-20",
      "display_name": "amazon-nova-experimental-chat-10-20",
      "provider": "amazon",
      "aliases": [
        "amazon-nova-experimental-chat-10-20"
      ],
      "openness": null,
      "license": "Proprietary",
      "parameter_billions": null,
      "context_window": null,
      "modalities": [
        "text"
      ],
      "source_ids": [
        "lmarena"
      ],
      "composite": {
        "score_0_100": 80.44,
        "confidence": "low",
        "components": [
          {
            "source_id": "lmarena",
            "metric": "lmarena_text_overall_rating",
            "normalized_0_100": 80.44,
            "weight": 0.35
          }
        ],
        "derived_from": [
          "lmarena:lmarena_text_overall_rating"
        ],
        "limitations": [
          "This is an external model-intelligence prior, not a RunLocalAI local speed or fit measurement.",
          "Provider papers, human-preference ratings, and LiveBench judgments are not interchangeable with quantized local execution.",
          "Use this score only beside Will-It-Run fit, quantization, context, runtime, and measured tok/s evidence."
        ]
      },
      "scores": [
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_chinese_rating",
          "value": 1407.9390438238167,
          "normalized_0_100": 82.05,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 121,
          "lower": 1384.3552140357206,
          "upper": 1431.522873611913,
          "category": "chinese",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_coding_rating",
          "value": 1413.18288039121,
          "normalized_0_100": 82.68,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 104,
          "lower": 1400.9842551886975,
          "upper": 1425.3815055937223,
          "category": "coding",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_overall_rating",
          "value": 1394.3859578954618,
          "normalized_0_100": 80.44,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 112,
          "lower": 1388.267084348231,
          "upper": 1400.5048314426929,
          "category": "overall",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        }
      ]
    },
    {
      "model_key": "mai-1-preview",
      "display_name": "mai-1-preview",
      "provider": "microsoft-ai",
      "aliases": [
        "mai-1-preview"
      ],
      "openness": null,
      "license": "Proprietary",
      "parameter_billions": null,
      "context_window": null,
      "modalities": [
        "text"
      ],
      "source_ids": [
        "lmarena"
      ],
      "composite": {
        "score_0_100": 80.37,
        "confidence": "low",
        "components": [
          {
            "source_id": "lmarena",
            "metric": "lmarena_text_overall_rating",
            "normalized_0_100": 80.37,
            "weight": 0.35
          }
        ],
        "derived_from": [
          "lmarena:lmarena_text_overall_rating"
        ],
        "limitations": [
          "This is an external model-intelligence prior, not a RunLocalAI local speed or fit measurement.",
          "Provider papers, human-preference ratings, and LiveBench judgments are not interchangeable with quantized local execution.",
          "Use this score only beside Will-It-Run fit, quantization, context, runtime, and measured tok/s evidence."
        ]
      },
      "scores": [
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_chinese_rating",
          "value": 1395.2798574146132,
          "normalized_0_100": 80.55,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 128,
          "lower": 1377.0155874768632,
          "upper": 1413.5441273523631,
          "category": "chinese",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_coding_rating",
          "value": 1396.2966585884942,
          "normalized_0_100": 80.67,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 126,
          "lower": 1385.3408761330163,
          "upper": 1407.252441043972,
          "category": "coding",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_overall_rating",
          "value": 1393.8317193428493,
          "normalized_0_100": 80.37,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 113,
          "lower": 1388.4376170718522,
          "upper": 1399.2258216138462,
          "category": "overall",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        }
      ]
    },
    {
      "model_key": "qwen3-235b-a22b-no-thinking",
      "display_name": "qwen3-235b-a22b-no-thinking",
      "provider": "alibaba",
      "aliases": [
        "qwen3-235b-a22b-no-thinking"
      ],
      "openness": null,
      "license": "Apache 2.0",
      "parameter_billions": null,
      "context_window": null,
      "modalities": [
        "text"
      ],
      "source_ids": [
        "lmarena"
      ],
      "composite": {
        "score_0_100": 80.36,
        "confidence": "low",
        "components": [
          {
            "source_id": "lmarena",
            "metric": "lmarena_text_overall_rating",
            "normalized_0_100": 80.36,
            "weight": 0.35
          }
        ],
        "derived_from": [
          "lmarena:lmarena_text_overall_rating"
        ],
        "limitations": [
          "This is an external model-intelligence prior, not a RunLocalAI local speed or fit measurement.",
          "Provider papers, human-preference ratings, and LiveBench judgments are not interchangeable with quantized local execution.",
          "Use this score only beside Will-It-Run fit, quantization, context, runtime, and measured tok/s evidence."
        ]
      },
      "scores": [
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_chinese_rating",
          "value": 1426.3832306544243,
          "normalized_0_100": 84.25,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 107,
          "lower": 1412.8405102706934,
          "upper": 1439.9259510381555,
          "category": "chinese",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_coding_rating",
          "value": 1398.8161574607557,
          "normalized_0_100": 80.97,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 123,
          "lower": 1391.3323714424153,
          "upper": 1406.2999434790963,
          "category": "coding",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_overall_rating",
          "value": 1393.673359228203,
          "normalized_0_100": 80.36,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 114,
          "lower": 1389.2066249247744,
          "upper": 1398.1400935316315,
          "category": "overall",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        }
      ]
    },
    {
      "model_key": "minimax-m2-1-preview",
      "display_name": "minimax-m2.1-preview",
      "provider": "minimax",
      "aliases": [
        "minimax-m2.1-preview"
      ],
      "openness": null,
      "license": "MIT",
      "parameter_billions": null,
      "context_window": null,
      "modalities": [
        "text"
      ],
      "source_ids": [
        "lmarena"
      ],
      "composite": {
        "score_0_100": 80.04,
        "confidence": "low",
        "components": [
          {
            "source_id": "lmarena",
            "metric": "lmarena_text_overall_rating",
            "normalized_0_100": 80.04,
            "weight": 0.35
          }
        ],
        "derived_from": [
          "lmarena:lmarena_text_overall_rating"
        ],
        "limitations": [
          "This is an external model-intelligence prior, not a RunLocalAI local speed or fit measurement.",
          "Provider papers, human-preference ratings, and LiveBench judgments are not interchangeable with quantized local execution.",
          "Use this score only beside Will-It-Run fit, quantization, context, runtime, and measured tok/s evidence."
        ]
      },
      "scores": [
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_chinese_rating",
          "value": 1427.8402610162802,
          "normalized_0_100": 84.42,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 104,
          "lower": 1407.4780420199236,
          "upper": 1448.2024800126371,
          "category": "chinese",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_coding_rating",
          "value": 1423.3677781644574,
          "normalized_0_100": 83.89,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 94,
          "lower": 1413.33314020705,
          "upper": 1433.402416121865,
          "category": "coding",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_overall_rating",
          "value": 1391.0496727538093,
          "normalized_0_100": 80.04,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 115,
          "lower": 1385.914681948988,
          "upper": 1396.184663558631,
          "category": "overall",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        }
      ]
    },
    {
      "model_key": "acm-rewrite-qwen2-72b-chat",
      "display_name": "acm_rewrite_qwen2-72B-Chat",
      "provider": null,
      "aliases": [
        "acm_rewrite_qwen2-72B-Chat"
      ],
      "openness": null,
      "license": null,
      "parameter_billions": null,
      "context_window": null,
      "modalities": [
        "text"
      ],
      "source_ids": [
        "livebench"
      ],
      "composite": {
        "score_0_100": 80,
        "confidence": "low",
        "components": [
          {
            "source_id": "livebench",
            "metric": "livebench_overall_mean_judgment",
            "normalized_0_100": 80,
            "weight": 0.2
          }
        ],
        "derived_from": [
          "livebench:livebench_overall_mean_judgment"
        ],
        "limitations": [
          "This is an external model-intelligence prior, not a RunLocalAI local speed or fit measurement.",
          "Provider papers, human-preference ratings, and LiveBench judgments are not interchangeable with quantized local execution.",
          "Use this score only beside Will-It-Run fit, quantization, context, runtime, and measured tok/s evidence."
        ]
      },
      "scores": [
        {
          "source_id": "livebench",
          "metric": "livebench_category_coding_mean_judgment",
          "value": 0.666667,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "coding",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench category aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_category_instruction_following_mean_judgment",
          "value": 1,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "instruction_following",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench category aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_category_language_mean_judgment",
          "value": 1,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "language",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench category aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_overall_mean_judgment",
          "value": 0.8,
          "normalized_0_100": 80,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench judgment aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_task_coding_completion_mean_judgment",
          "value": 1,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "coding",
          "task": "coding_completion",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench task aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_task_connections_mean_judgment",
          "value": 1,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "language",
          "task": "connections",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench task aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_task_lcb_generation_mean_judgment",
          "value": 0.5,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "coding",
          "task": "LCB_generation",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench task aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_task_story_generation_mean_judgment",
          "value": 1,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "instruction_following",
          "task": "story_generation",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench task aggregate; not a RunLocalAI local execution measurement."
        }
      ]
    },
    {
      "model_key": "paddlepaddle-paddleocr-vl",
      "display_name": "PaddlePaddle/PaddleOCR-VL",
      "provider": "PaddlePaddle",
      "aliases": [
        "PaddlePaddle/PaddleOCR-VL"
      ],
      "openness": "open",
      "license": "Unknown",
      "parameter_billions": 1,
      "context_window": null,
      "modalities": [
        "text"
      ],
      "source_ids": [
        "openevals"
      ],
      "composite": {
        "score_0_100": 80,
        "confidence": "low",
        "components": [
          {
            "source_id": "openevals",
            "metric": "openevals_aggregate",
            "normalized_0_100": 80,
            "weight": 0.45
          }
        ],
        "derived_from": [
          "openevals:openevals_aggregate"
        ],
        "limitations": [
          "This is an external model-intelligence prior, not a RunLocalAI local speed or fit measurement.",
          "Provider papers, human-preference ratings, and LiveBench judgments are not interchangeable with quantized local execution.",
          "Use this score only beside Will-It-Run fit, quantization, context, runtime, and measured tok/s evidence."
        ]
      },
      "scores": [
        {
          "source_id": "openevals",
          "metric": "olm_ocr",
          "value": 80,
          "normalized_0_100": 80,
          "scale": "0-100",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/OpenEvals/leaderboard-data",
          "source_dataset": "OpenEvals/leaderboard-data",
          "source_config": "default",
          "source_split": "train",
          "source_row_count": 105,
          "captured_at": "2026-05-28T11:57:28.766Z",
          "evidence_note": "External model quality prior from OpenEvals; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "openevals",
          "metric": "openevals_aggregate",
          "value": 80,
          "normalized_0_100": 80,
          "scale": "0-100",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/OpenEvals/leaderboard-data",
          "source_dataset": "OpenEvals/leaderboard-data",
          "source_config": "default",
          "source_split": "train",
          "source_row_count": 105,
          "captured_at": "2026-05-28T11:57:28.766Z",
          "evidence_note": "External model quality prior from OpenEvals; not a RunLocalAI local execution measurement."
        }
      ]
    },
    {
      "model_key": "claude-haiku-4-5-20251001",
      "display_name": "claude-haiku-4-5-20251001",
      "provider": "anthropic",
      "aliases": [
        "claude-haiku-4-5-20251001"
      ],
      "openness": null,
      "license": "Proprietary",
      "parameter_billions": null,
      "context_window": null,
      "modalities": [
        "text"
      ],
      "source_ids": [
        "lmarena"
      ],
      "composite": {
        "score_0_100": 79.97,
        "confidence": "low",
        "components": [
          {
            "source_id": "lmarena",
            "metric": "lmarena_text_overall_rating",
            "normalized_0_100": 79.97,
            "weight": 0.35
          }
        ],
        "derived_from": [
          "lmarena:lmarena_text_overall_rating"
        ],
        "limitations": [
          "This is an external model-intelligence prior, not a RunLocalAI local speed or fit measurement.",
          "Provider papers, human-preference ratings, and LiveBench judgments are not interchangeable with quantized local execution.",
          "Use this score only beside Will-It-Run fit, quantization, context, runtime, and measured tok/s evidence."
        ]
      },
      "scores": [
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_chinese_rating",
          "value": 1411.4529248570168,
          "normalized_0_100": 82.47,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 119,
          "lower": 1400.6091607389133,
          "upper": 1422.2966889751203,
          "category": "chinese",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_coding_rating",
          "value": 1450.451515522561,
          "normalized_0_100": 87.11,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 52,
          "lower": 1445.1691852003494,
          "upper": 1455.7338458447725,
          "category": "coding",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_overall_rating",
          "value": 1390.440597405122,
          "normalized_0_100": 79.97,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 116,
          "lower": 1387.5034567678827,
          "upper": 1393.377738042361,
          "category": "overall",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        }
      ]
    },
    {
      "model_key": "qwen-qwen2-7b",
      "display_name": "Qwen/Qwen2-7B",
      "provider": "Qwen",
      "aliases": [
        "Qwen/Qwen2-7B"
      ],
      "openness": "open",
      "license": "Unknown",
      "parameter_billions": 7.6,
      "context_window": null,
      "modalities": [
        "text"
      ],
      "source_ids": [
        "openevals"
      ],
      "composite": {
        "score_0_100": 79.9,
        "confidence": "low",
        "components": [
          {
            "source_id": "openevals",
            "metric": "openevals_aggregate",
            "normalized_0_100": 79.9,
            "weight": 0.45
          }
        ],
        "derived_from": [
          "openevals:openevals_aggregate"
        ],
        "limitations": [
          "This is an external model-intelligence prior, not a RunLocalAI local speed or fit measurement.",
          "Provider papers, human-preference ratings, and LiveBench judgments are not interchangeable with quantized local execution.",
          "Use this score only beside Will-It-Run fit, quantization, context, runtime, and measured tok/s evidence."
        ]
      },
      "scores": [
        {
          "source_id": "openevals",
          "metric": "gsm8k",
          "value": 79.9,
          "normalized_0_100": 79.9,
          "scale": "0-100",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/OpenEvals/leaderboard-data",
          "source_dataset": "OpenEvals/leaderboard-data",
          "source_config": "default",
          "source_split": "train",
          "source_row_count": 105,
          "captured_at": "2026-05-28T11:57:28.766Z",
          "evidence_note": "External model quality prior from OpenEvals; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "openevals",
          "metric": "openevals_aggregate",
          "value": 79.9,
          "normalized_0_100": 79.9,
          "scale": "0-100",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/OpenEvals/leaderboard-data",
          "source_dataset": "OpenEvals/leaderboard-data",
          "source_config": "default",
          "source_split": "train",
          "source_row_count": 105,
          "captured_at": "2026-05-28T11:57:28.766Z",
          "evidence_note": "External model quality prior from OpenEvals; not a RunLocalAI local execution measurement."
        }
      ]
    },
    {
      "model_key": "baidu-qianfan-ocr",
      "display_name": "baidu/Qianfan-OCR",
      "provider": "baidu",
      "aliases": [
        "baidu/Qianfan-OCR"
      ],
      "openness": "open",
      "license": "Unknown",
      "parameter_billions": 4.7,
      "context_window": null,
      "modalities": [
        "text"
      ],
      "source_ids": [
        "openevals"
      ],
      "composite": {
        "score_0_100": 79.8,
        "confidence": "low",
        "components": [
          {
            "source_id": "openevals",
            "metric": "openevals_aggregate",
            "normalized_0_100": 79.8,
            "weight": 0.45
          }
        ],
        "derived_from": [
          "openevals:openevals_aggregate"
        ],
        "limitations": [
          "This is an external model-intelligence prior, not a RunLocalAI local speed or fit measurement.",
          "Provider papers, human-preference ratings, and LiveBench judgments are not interchangeable with quantized local execution.",
          "Use this score only beside Will-It-Run fit, quantization, context, runtime, and measured tok/s evidence."
        ]
      },
      "scores": [
        {
          "source_id": "openevals",
          "metric": "olm_ocr",
          "value": 79.8,
          "normalized_0_100": 79.8,
          "scale": "0-100",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/OpenEvals/leaderboard-data",
          "source_dataset": "OpenEvals/leaderboard-data",
          "source_config": "default",
          "source_split": "train",
          "source_row_count": 105,
          "captured_at": "2026-05-28T11:57:28.766Z",
          "evidence_note": "External model quality prior from OpenEvals; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "openevals",
          "metric": "openevals_aggregate",
          "value": 79.8,
          "normalized_0_100": 79.8,
          "scale": "0-100",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/OpenEvals/leaderboard-data",
          "source_dataset": "OpenEvals/leaderboard-data",
          "source_config": "default",
          "source_split": "train",
          "source_row_count": 105,
          "captured_at": "2026-05-28T11:57:28.766Z",
          "evidence_note": "External model quality prior from OpenEvals; not a RunLocalAI local execution measurement."
        }
      ]
    },
    {
      "model_key": "internlm-internlm2-chat-20b",
      "display_name": "internlm/internlm2-chat-20b",
      "provider": "internlm",
      "aliases": [
        "internlm/internlm2-chat-20b"
      ],
      "openness": "open",
      "license": "Unknown",
      "parameter_billions": 19.9,
      "context_window": null,
      "modalities": [
        "text"
      ],
      "source_ids": [
        "openevals"
      ],
      "composite": {
        "score_0_100": 79.6,
        "confidence": "low",
        "components": [
          {
            "source_id": "openevals",
            "metric": "openevals_aggregate",
            "normalized_0_100": 79.6,
            "weight": 0.45
          }
        ],
        "derived_from": [
          "openevals:openevals_aggregate"
        ],
        "limitations": [
          "This is an external model-intelligence prior, not a RunLocalAI local speed or fit measurement.",
          "Provider papers, human-preference ratings, and LiveBench judgments are not interchangeable with quantized local execution.",
          "Use this score only beside Will-It-Run fit, quantization, context, runtime, and measured tok/s evidence."
        ]
      },
      "scores": [
        {
          "source_id": "openevals",
          "metric": "gsm8k",
          "value": 79.6,
          "normalized_0_100": 79.6,
          "scale": "0-100",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/OpenEvals/leaderboard-data",
          "source_dataset": "OpenEvals/leaderboard-data",
          "source_config": "default",
          "source_split": "train",
          "source_row_count": 105,
          "captured_at": "2026-05-28T11:57:28.766Z",
          "evidence_note": "External model quality prior from OpenEvals; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "openevals",
          "metric": "openevals_aggregate",
          "value": 79.6,
          "normalized_0_100": 79.6,
          "scale": "0-100",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/OpenEvals/leaderboard-data",
          "source_dataset": "OpenEvals/leaderboard-data",
          "source_config": "default",
          "source_split": "train",
          "source_row_count": 105,
          "captured_at": "2026-05-28T11:57:28.766Z",
          "evidence_note": "External model quality prior from OpenEvals; not a RunLocalAI local execution measurement."
        }
      ]
    },
    {
      "model_key": "gpt-5-3-chat-latest",
      "display_name": "gpt-5.3-chat-latest",
      "provider": "openai",
      "aliases": [
        "gpt-5.3-chat-latest"
      ],
      "openness": null,
      "license": "Proprietary",
      "parameter_billions": null,
      "context_window": null,
      "modalities": [
        "text"
      ],
      "source_ids": [
        "lmarena"
      ],
      "composite": {
        "score_0_100": 79.54,
        "confidence": "low",
        "components": [
          {
            "source_id": "lmarena",
            "metric": "lmarena_text_overall_rating",
            "normalized_0_100": 79.54,
            "weight": 0.35
          }
        ],
        "derived_from": [
          "lmarena:lmarena_text_overall_rating"
        ],
        "limitations": [
          "This is an external model-intelligence prior, not a RunLocalAI local speed or fit measurement.",
          "Provider papers, human-preference ratings, and LiveBench judgments are not interchangeable with quantized local execution.",
          "Use this score only beside Will-It-Run fit, quantization, context, runtime, and measured tok/s evidence."
        ]
      },
      "scores": [
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_chinese_rating",
          "value": 1421.1932747687545,
          "normalized_0_100": 83.63,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 110,
          "lower": 1404.1289753903786,
          "upper": 1438.2575741471305,
          "category": "chinese",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_coding_rating",
          "value": 1403.9521081189862,
          "normalized_0_100": 81.58,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 117,
          "lower": 1396.0972954683525,
          "upper": 1411.8069207696199,
          "category": "coding",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_overall_rating",
          "value": 1386.8154439488135,
          "normalized_0_100": 79.54,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 117,
          "lower": 1382.2764157233087,
          "upper": 1391.3544721743183,
          "category": "overall",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        }
      ]
    },
    {
      "model_key": "deepseek-ai-deepseek-v2",
      "display_name": "deepseek-ai/DeepSeek-V2",
      "provider": "deepseek-ai",
      "aliases": [
        "deepseek-ai/DeepSeek-V2"
      ],
      "openness": "open",
      "license": "Unknown",
      "parameter_billions": 235.7,
      "context_window": null,
      "modalities": [
        "text"
      ],
      "source_ids": [
        "openevals"
      ],
      "composite": {
        "score_0_100": 79.2,
        "confidence": "low",
        "components": [
          {
            "source_id": "openevals",
            "metric": "openevals_aggregate",
            "normalized_0_100": 79.2,
            "weight": 0.45
          }
        ],
        "derived_from": [
          "openevals:openevals_aggregate"
        ],
        "limitations": [
          "This is an external model-intelligence prior, not a RunLocalAI local speed or fit measurement.",
          "Provider papers, human-preference ratings, and LiveBench judgments are not interchangeable with quantized local execution.",
          "Use this score only beside Will-It-Run fit, quantization, context, runtime, and measured tok/s evidence."
        ]
      },
      "scores": [
        {
          "source_id": "openevals",
          "metric": "gsm8k",
          "value": 79.2,
          "normalized_0_100": 79.2,
          "scale": "0-100",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/OpenEvals/leaderboard-data",
          "source_dataset": "OpenEvals/leaderboard-data",
          "source_config": "default",
          "source_split": "train",
          "source_row_count": 105,
          "captured_at": "2026-05-28T11:57:28.766Z",
          "evidence_note": "External model quality prior from OpenEvals; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "openevals",
          "metric": "openevals_aggregate",
          "value": 79.2,
          "normalized_0_100": 79.2,
          "scale": "0-100",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/OpenEvals/leaderboard-data",
          "source_dataset": "OpenEvals/leaderboard-data",
          "source_config": "default",
          "source_split": "train",
          "source_row_count": 105,
          "captured_at": "2026-05-28T11:57:28.766Z",
          "evidence_note": "External model quality prior from OpenEvals; not a RunLocalAI local execution measurement."
        }
      ]
    },
    {
      "model_key": "qwen3-30b-a3b-instruct-2507",
      "display_name": "qwen3-30b-a3b-instruct-2507",
      "provider": "alibaba",
      "aliases": [
        "qwen3-30b-a3b-instruct-2507"
      ],
      "openness": null,
      "license": "Apache 2.0",
      "parameter_billions": null,
      "context_window": null,
      "modalities": [
        "text"
      ],
      "source_ids": [
        "lmarena"
      ],
      "composite": {
        "score_0_100": 79.19,
        "confidence": "low",
        "components": [
          {
            "source_id": "lmarena",
            "metric": "lmarena_text_overall_rating",
            "normalized_0_100": 79.19,
            "weight": 0.35
          }
        ],
        "derived_from": [
          "lmarena:lmarena_text_overall_rating"
        ],
        "limitations": [
          "This is an external model-intelligence prior, not a RunLocalAI local speed or fit measurement.",
          "Provider papers, human-preference ratings, and LiveBench judgments are not interchangeable with quantized local execution.",
          "Use this score only beside Will-It-Run fit, quantization, context, runtime, and measured tok/s evidence."
        ]
      },
      "scores": [
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_chinese_rating",
          "value": 1433.7400783266564,
          "normalized_0_100": 85.12,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 96,
          "lower": 1417.3539351591878,
          "upper": 1450.126221494125,
          "category": "chinese",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_coding_rating",
          "value": 1417.9145915894235,
          "normalized_0_100": 83.24,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 101,
          "lower": 1409.123085857588,
          "upper": 1426.7060973212588,
          "category": "coding",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_overall_rating",
          "value": 1383.8329339329243,
          "normalized_0_100": 79.19,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 118,
          "lower": 1378.9826515509997,
          "upper": 1388.6832163148488,
          "category": "overall",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        }
      ]
    },
    {
      "model_key": "rednote-hilab-dots-ocr",
      "display_name": "rednote-hilab/dots.ocr",
      "provider": "rednote-hilab",
      "aliases": [
        "rednote-hilab/dots.ocr"
      ],
      "openness": "open",
      "license": "Unknown",
      "parameter_billions": 3,
      "context_window": null,
      "modalities": [
        "text"
      ],
      "source_ids": [
        "openevals"
      ],
      "composite": {
        "score_0_100": 79.1,
        "confidence": "low",
        "components": [
          {
            "source_id": "openevals",
            "metric": "openevals_aggregate",
            "normalized_0_100": 79.1,
            "weight": 0.45
          }
        ],
        "derived_from": [
          "openevals:openevals_aggregate"
        ],
        "limitations": [
          "This is an external model-intelligence prior, not a RunLocalAI local speed or fit measurement.",
          "Provider papers, human-preference ratings, and LiveBench judgments are not interchangeable with quantized local execution.",
          "Use this score only beside Will-It-Run fit, quantization, context, runtime, and measured tok/s evidence."
        ]
      },
      "scores": [
        {
          "source_id": "openevals",
          "metric": "olm_ocr",
          "value": 79.1,
          "normalized_0_100": 79.1,
          "scale": "0-100",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/OpenEvals/leaderboard-data",
          "source_dataset": "OpenEvals/leaderboard-data",
          "source_config": "default",
          "source_split": "train",
          "source_row_count": 105,
          "captured_at": "2026-05-28T11:57:28.766Z",
          "evidence_note": "External model quality prior from OpenEvals; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "openevals",
          "metric": "openevals_aggregate",
          "value": 79.1,
          "normalized_0_100": 79.1,
          "scale": "0-100",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/OpenEvals/leaderboard-data",
          "source_dataset": "OpenEvals/leaderboard-data",
          "source_config": "default",
          "source_split": "train",
          "source_row_count": 105,
          "captured_at": "2026-05-28T11:57:28.766Z",
          "evidence_note": "External model quality prior from OpenEvals; not a RunLocalAI local execution measurement."
        }
      ]
    },
    {
      "model_key": "glm-4-5-air",
      "display_name": "glm-4.5-air",
      "provider": "zai",
      "aliases": [
        "glm-4.5-air"
      ],
      "openness": null,
      "license": "MIT",
      "parameter_billions": null,
      "context_window": null,
      "modalities": [
        "text"
      ],
      "source_ids": [
        "lmarena"
      ],
      "composite": {
        "score_0_100": 79.03,
        "confidence": "low",
        "components": [
          {
            "source_id": "lmarena",
            "metric": "lmarena_text_overall_rating",
            "normalized_0_100": 79.03,
            "weight": 0.35
          }
        ],
        "derived_from": [
          "lmarena:lmarena_text_overall_rating"
        ],
        "limitations": [
          "This is an external model-intelligence prior, not a RunLocalAI local speed or fit measurement.",
          "Provider papers, human-preference ratings, and LiveBench judgments are not interchangeable with quantized local execution.",
          "Use this score only beside Will-It-Run fit, quantization, context, runtime, and measured tok/s evidence."
        ]
      },
      "scores": [
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_chinese_rating",
          "value": 1431.0720160385354,
          "normalized_0_100": 84.8,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 99,
          "lower": 1416.2572470406499,
          "upper": 1445.8867850364209,
          "category": "chinese",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_coding_rating",
          "value": 1396.3650686520668,
          "normalized_0_100": 80.68,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 125,
          "lower": 1388.6639989185953,
          "upper": 1404.0661383855381,
          "category": "coding",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_overall_rating",
          "value": 1382.500005489786,
          "normalized_0_100": 79.03,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 119,
          "lower": 1378.2864216676176,
          "upper": 1386.7135893119546,
          "category": "overall",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        }
      ]
    },
    {
      "model_key": "gpt-4-1-2025-04-14",
      "display_name": "gpt-4.1-2025-04-14",
      "provider": "openai",
      "aliases": [
        "gpt-4.1-2025-04-14"
      ],
      "openness": null,
      "license": "Proprietary",
      "parameter_billions": null,
      "context_window": null,
      "modalities": [
        "text"
      ],
      "source_ids": [
        "lmarena"
      ],
      "composite": {
        "score_0_100": 78.95,
        "confidence": "low",
        "components": [
          {
            "source_id": "lmarena",
            "metric": "lmarena_text_overall_rating",
            "normalized_0_100": 78.95,
            "weight": 0.35
          }
        ],
        "derived_from": [
          "lmarena:lmarena_text_overall_rating"
        ],
        "limitations": [
          "This is an external model-intelligence prior, not a RunLocalAI local speed or fit measurement.",
          "Provider papers, human-preference ratings, and LiveBench judgments are not interchangeable with quantized local execution.",
          "Use this score only beside Will-It-Run fit, quantization, context, runtime, and measured tok/s evidence."
        ]
      },
      "scores": [
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_chinese_rating",
          "value": 1373.3738019034636,
          "normalized_0_100": 77.94,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 148,
          "lower": 1361.2300253721858,
          "upper": 1385.5175784347412,
          "category": "chinese",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_coding_rating",
          "value": 1390.3733814063214,
          "normalized_0_100": 79.96,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 131,
          "lower": 1383.8020329365856,
          "upper": 1396.9447298760572,
          "category": "coding",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_overall_rating",
          "value": 1381.873791450749,
          "normalized_0_100": 78.95,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 120,
          "lower": 1378.2017540793465,
          "upper": 1385.5458288221514,
          "category": "overall",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        }
      ]
    },
    {
      "model_key": "kimi-k2-0905-preview",
      "display_name": "kimi-k2-0905-preview",
      "provider": "moonshot",
      "aliases": [
        "kimi-k2-0905-preview"
      ],
      "openness": null,
      "license": "Modified MIT",
      "parameter_billions": null,
      "context_window": null,
      "modalities": [
        "text"
      ],
      "source_ids": [
        "lmarena"
      ],
      "composite": {
        "score_0_100": 78.66,
        "confidence": "low",
        "components": [
          {
            "source_id": "lmarena",
            "metric": "lmarena_text_overall_rating",
            "normalized_0_100": 78.66,
            "weight": 0.35
          }
        ],
        "derived_from": [
          "lmarena:lmarena_text_overall_rating"
        ],
        "limitations": [
          "This is an external model-intelligence prior, not a RunLocalAI local speed or fit measurement.",
          "Provider papers, human-preference ratings, and LiveBench judgments are not interchangeable with quantized local execution.",
          "Use this score only beside Will-It-Run fit, quantization, context, runtime, and measured tok/s evidence."
        ]
      },
      "scores": [
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_chinese_rating",
          "value": 1412.8250834444605,
          "normalized_0_100": 82.63,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 118,
          "lower": 1391.070197230766,
          "upper": 1434.5799696581548,
          "category": "chinese",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_coding_rating",
          "value": 1399.7812221893225,
          "normalized_0_100": 81.08,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 122,
          "lower": 1387.3750711244575,
          "upper": 1412.1873732541874,
          "category": "coding",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_overall_rating",
          "value": 1379.4407091620265,
          "normalized_0_100": 78.66,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 121,
          "lower": 1372.9824665643832,
          "upper": 1385.8989517596694,
          "category": "overall",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        }
      ]
    },
    {
      "model_key": "gemini-2-5-flash-lite-preview-09-2025-no-thinking",
      "display_name": "gemini-2.5-flash-lite-preview-09-2025-no-thinking",
      "provider": "google",
      "aliases": [
        "gemini-2.5-flash-lite-preview-09-2025-no-thinking"
      ],
      "openness": null,
      "license": "Proprietary",
      "parameter_billions": null,
      "context_window": null,
      "modalities": [
        "text"
      ],
      "source_ids": [
        "lmarena"
      ],
      "composite": {
        "score_0_100": 78.61,
        "confidence": "low",
        "components": [
          {
            "source_id": "lmarena",
            "metric": "lmarena_text_overall_rating",
            "normalized_0_100": 78.61,
            "weight": 0.35
          }
        ],
        "derived_from": [
          "lmarena:lmarena_text_overall_rating"
        ],
        "limitations": [
          "This is an external model-intelligence prior, not a RunLocalAI local speed or fit measurement.",
          "Provider papers, human-preference ratings, and LiveBench judgments are not interchangeable with quantized local execution.",
          "Use this score only beside Will-It-Run fit, quantization, context, runtime, and measured tok/s evidence."
        ]
      },
      "scores": [
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_chinese_rating",
          "value": 1402.8478559443472,
          "normalized_0_100": 81.45,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 123,
          "lower": 1389.9057293854025,
          "upper": 1415.7899825032919,
          "category": "chinese",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_coding_rating",
          "value": 1371.6842050070682,
          "normalized_0_100": 77.74,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 148,
          "lower": 1365.2180428150482,
          "upper": 1378.150367199088,
          "category": "coding",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_overall_rating",
          "value": 1379.013405459154,
          "normalized_0_100": 78.61,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 122,
          "lower": 1375.589381615004,
          "upper": 1382.437429303304,
          "category": "overall",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        }
      ]
    },
    {
      "model_key": "nvidia-nemotron-3-super-120b-a12b",
      "display_name": "nvidia-nemotron-3-super-120b-a12b",
      "provider": "nvidia",
      "aliases": [
        "nvidia-nemotron-3-super-120b-a12b"
      ],
      "openness": null,
      "license": "NVIDIA Open Model",
      "parameter_billions": null,
      "context_window": null,
      "modalities": [
        "text"
      ],
      "source_ids": [
        "lmarena"
      ],
      "composite": {
        "score_0_100": 78.53,
        "confidence": "low",
        "components": [
          {
            "source_id": "lmarena",
            "metric": "lmarena_text_overall_rating",
            "normalized_0_100": 78.53,
            "weight": 0.35
          }
        ],
        "derived_from": [
          "lmarena:lmarena_text_overall_rating"
        ],
        "limitations": [
          "This is an external model-intelligence prior, not a RunLocalAI local speed or fit measurement.",
          "Provider papers, human-preference ratings, and LiveBench judgments are not interchangeable with quantized local execution.",
          "Use this score only beside Will-It-Run fit, quantization, context, runtime, and measured tok/s evidence."
        ]
      },
      "scores": [
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_chinese_rating",
          "value": 1429.9358511735807,
          "normalized_0_100": 84.67,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 102,
          "lower": 1401.4253146399126,
          "upper": 1458.4463877072487,
          "category": "chinese",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_coding_rating",
          "value": 1404.245567915373,
          "normalized_0_100": 81.61,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 116,
          "lower": 1390.4627940926061,
          "upper": 1418.0283417381397,
          "category": "coding",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_overall_rating",
          "value": 1378.2808813243034,
          "normalized_0_100": 78.53,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 123,
          "lower": 1371.1112122582813,
          "upper": 1385.4505503903256,
          "category": "overall",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        }
      ]
    },
    {
      "model_key": "gpt-5-4-nano-high",
      "display_name": "gpt-5.4-nano-high",
      "provider": "openai",
      "aliases": [
        "gpt-5.4-nano-high"
      ],
      "openness": null,
      "license": "Proprietary",
      "parameter_billions": null,
      "context_window": null,
      "modalities": [
        "text"
      ],
      "source_ids": [
        "lmarena"
      ],
      "composite": {
        "score_0_100": 78.38,
        "confidence": "low",
        "components": [
          {
            "source_id": "lmarena",
            "metric": "lmarena_text_overall_rating",
            "normalized_0_100": 78.38,
            "weight": 0.35
          }
        ],
        "derived_from": [
          "lmarena:lmarena_text_overall_rating"
        ],
        "limitations": [
          "This is an external model-intelligence prior, not a RunLocalAI local speed or fit measurement.",
          "Provider papers, human-preference ratings, and LiveBench judgments are not interchangeable with quantized local execution.",
          "Use this score only beside Will-It-Run fit, quantization, context, runtime, and measured tok/s evidence."
        ]
      },
      "scores": [
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_chinese_rating",
          "value": 1414.7226058047402,
          "normalized_0_100": 82.86,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 117,
          "lower": 1395.2195975795757,
          "upper": 1434.2256140299044,
          "category": "chinese",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_coding_rating",
          "value": 1407.8677307778873,
          "normalized_0_100": 82.04,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 112,
          "lower": 1398.9552940137507,
          "upper": 1416.7801675420242,
          "category": "coding",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_overall_rating",
          "value": 1377.0228218897662,
          "normalized_0_100": 78.38,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 124,
          "lower": 1372.010580069793,
          "upper": 1382.0350637097395,
          "category": "overall",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        }
      ]
    },
    {
      "model_key": "meituan-longcat-longcat-flash-lite",
      "display_name": "meituan-longcat/LongCat-Flash-Lite",
      "provider": "meituan-longcat",
      "aliases": [
        "meituan-longcat/LongCat-Flash-Lite"
      ],
      "openness": "open",
      "license": "Unknown",
      "parameter_billions": null,
      "context_window": null,
      "modalities": [
        "text"
      ],
      "source_ids": [
        "openevals"
      ],
      "composite": {
        "score_0_100": 78.29,
        "confidence": "low",
        "components": [
          {
            "source_id": "openevals",
            "metric": "openevals_aggregate",
            "normalized_0_100": 78.29,
            "weight": 0.45
          }
        ],
        "derived_from": [
          "openevals:openevals_aggregate"
        ],
        "limitations": [
          "This is an external model-intelligence prior, not a RunLocalAI local speed or fit measurement.",
          "Provider papers, human-preference ratings, and LiveBench judgments are not interchangeable with quantized local execution.",
          "Use this score only beside Will-It-Run fit, quantization, context, runtime, and measured tok/s evidence."
        ]
      },
      "scores": [
        {
          "source_id": "openevals",
          "metric": "mmlu_pro",
          "value": 78.29,
          "normalized_0_100": 78.29,
          "scale": "0-100",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/OpenEvals/leaderboard-data",
          "source_dataset": "OpenEvals/leaderboard-data",
          "source_config": "default",
          "source_split": "train",
          "source_row_count": 105,
          "captured_at": "2026-05-28T11:57:28.766Z",
          "evidence_note": "External model quality prior from OpenEvals; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "openevals",
          "metric": "openevals_aggregate",
          "value": 78.29,
          "normalized_0_100": 78.29,
          "scale": "0-100",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/OpenEvals/leaderboard-data",
          "source_dataset": "OpenEvals/leaderboard-data",
          "source_config": "default",
          "source_split": "train",
          "source_row_count": 105,
          "captured_at": "2026-05-28T11:57:28.766Z",
          "evidence_note": "External model quality prior from OpenEvals; not a RunLocalAI local execution measurement."
        }
      ]
    },
    {
      "model_key": "hunyuan-turbos-20250416",
      "display_name": "hunyuan-turbos-20250416",
      "provider": "tencent",
      "aliases": [
        "hunyuan-turbos-20250416"
      ],
      "openness": null,
      "license": "Proprietary",
      "parameter_billions": null,
      "context_window": null,
      "modalities": [
        "text"
      ],
      "source_ids": [
        "lmarena"
      ],
      "composite": {
        "score_0_100": 78.23,
        "confidence": "low",
        "components": [
          {
            "source_id": "lmarena",
            "metric": "lmarena_text_overall_rating",
            "normalized_0_100": 78.23,
            "weight": 0.35
          }
        ],
        "derived_from": [
          "lmarena:lmarena_text_overall_rating"
        ],
        "limitations": [
          "This is an external model-intelligence prior, not a RunLocalAI local speed or fit measurement.",
          "Provider papers, human-preference ratings, and LiveBench judgments are not interchangeable with quantized local execution.",
          "Use this score only beside Will-It-Run fit, quantization, context, runtime, and measured tok/s evidence."
        ]
      },
      "scores": [
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_chinese_rating",
          "value": 1419.7800192497004,
          "normalized_0_100": 83.46,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 114,
          "lower": 1392.1469054727581,
          "upper": 1447.413133026643,
          "category": "chinese",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_coding_rating",
          "value": 1361.3654285969424,
          "normalized_0_100": 76.51,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 164,
          "lower": 1347.7292617682851,
          "upper": 1375.0015954255996,
          "category": "coding",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_overall_rating",
          "value": 1375.780720904649,
          "normalized_0_100": 78.23,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 125,
          "lower": 1369.3654748362737,
          "upper": 1382.1959669730238,
          "category": "overall",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        }
      ]
    },
    {
      "model_key": "claude-opus-4-20250514-thinking-16k",
      "display_name": "claude-opus-4-20250514-thinking-16k",
      "provider": "anthropic",
      "aliases": [
        "claude-opus-4-20250514-thinking-16k"
      ],
      "openness": null,
      "license": "Proprietary",
      "parameter_billions": null,
      "context_window": null,
      "modalities": [
        "text"
      ],
      "source_ids": [
        "lmarena"
      ],
      "composite": {
        "score_0_100": 78.17,
        "confidence": "low",
        "components": [
          {
            "source_id": "lmarena",
            "metric": "lmarena_text_overall_rating",
            "normalized_0_100": 78.17,
            "weight": 0.35
          }
        ],
        "derived_from": [
          "lmarena:lmarena_text_overall_rating"
        ],
        "limitations": [
          "This is an external model-intelligence prior, not a RunLocalAI local speed or fit measurement.",
          "Provider papers, human-preference ratings, and LiveBench judgments are not interchangeable with quantized local execution.",
          "Use this score only beside Will-It-Run fit, quantization, context, runtime, and measured tok/s evidence."
        ]
      },
      "scores": [
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_chinese_rating",
          "value": 1388.3702083522132,
          "normalized_0_100": 79.73,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 134,
          "lower": 1374.386613021201,
          "upper": 1402.3538036832258,
          "category": "chinese",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_coding_rating",
          "value": 1442.249564458795,
          "normalized_0_100": 86.13,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 68,
          "lower": 1434.4338284772411,
          "upper": 1450.065300440349,
          "category": "coding",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_overall_rating",
          "value": 1375.3194785774372,
          "normalized_0_100": 78.17,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 126,
          "lower": 1370.94243109351,
          "upper": 1379.696526061364,
          "category": "overall",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        }
      ]
    },
    {
      "model_key": "glm-4-6v",
      "display_name": "glm-4.6v",
      "provider": "zai",
      "aliases": [
        "glm-4.6v"
      ],
      "openness": null,
      "license": "MIT",
      "parameter_billions": null,
      "context_window": null,
      "modalities": [
        "text"
      ],
      "source_ids": [
        "lmarena"
      ],
      "composite": {
        "score_0_100": 78.16,
        "confidence": "low",
        "components": [
          {
            "source_id": "lmarena",
            "metric": "lmarena_text_overall_rating",
            "normalized_0_100": 78.16,
            "weight": 0.35
          }
        ],
        "derived_from": [
          "lmarena:lmarena_text_overall_rating"
        ],
        "limitations": [
          "This is an external model-intelligence prior, not a RunLocalAI local speed or fit measurement.",
          "Provider papers, human-preference ratings, and LiveBench judgments are not interchangeable with quantized local execution.",
          "Use this score only beside Will-It-Run fit, quantization, context, runtime, and measured tok/s evidence."
        ]
      },
      "scores": [
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_coding_rating",
          "value": 1394.745755756926,
          "normalized_0_100": 80.48,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 127,
          "lower": 1369.8521307527321,
          "upper": 1419.63938076112,
          "category": "coding",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_overall_rating",
          "value": 1375.2431809758762,
          "normalized_0_100": 78.16,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 127,
          "lower": 1364.0160251228538,
          "upper": 1386.4703368288988,
          "category": "overall",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        }
      ]
    },
    {
      "model_key": "gemini-exp-1121",
      "display_name": "gemini-exp-1121",
      "provider": null,
      "aliases": [
        "gemini-exp-1121"
      ],
      "openness": null,
      "license": null,
      "parameter_billions": null,
      "context_window": null,
      "modalities": [
        "text"
      ],
      "source_ids": [
        "livebench"
      ],
      "composite": {
        "score_0_100": 78.15,
        "confidence": "low",
        "components": [
          {
            "source_id": "livebench",
            "metric": "livebench_overall_mean_judgment",
            "normalized_0_100": 78.15,
            "weight": 0.2
          }
        ],
        "derived_from": [
          "livebench:livebench_overall_mean_judgment"
        ],
        "limitations": [
          "This is an external model-intelligence prior, not a RunLocalAI local speed or fit measurement.",
          "Provider papers, human-preference ratings, and LiveBench judgments are not interchangeable with quantized local execution.",
          "Use this score only beside Will-It-Run fit, quantization, context, runtime, and measured tok/s evidence."
        ]
      },
      "scores": [
        {
          "source_id": "livebench",
          "metric": "livebench_category_coding_mean_judgment",
          "value": 0.5,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "coding",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench category aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_category_instruction_following_mean_judgment",
          "value": 1,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "instruction_following",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench category aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_category_language_mean_judgment",
          "value": 0.823529,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "language",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench category aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_overall_mean_judgment",
          "value": 0.781513,
          "normalized_0_100": 78.15,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench judgment aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_task_connections_mean_judgment",
          "value": 1,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "language",
          "task": "connections",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench task aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_task_lcb_generation_mean_judgment",
          "value": 0.5,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "coding",
          "task": "LCB_generation",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench task aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_task_paraphrase_mean_judgment",
          "value": 1,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "instruction_following",
          "task": "paraphrase",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench task aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_task_plot_unscrambling_mean_judgment",
          "value": 0.470588,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "language",
          "task": "plot_unscrambling",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench task aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_task_story_generation_mean_judgment",
          "value": 1,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "instruction_following",
          "task": "story_generation",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench task aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_task_typos_mean_judgment",
          "value": 1,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "language",
          "task": "typos",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench task aggregate; not a RunLocalAI local execution measurement."
        }
      ]
    },
    {
      "model_key": "gpt-5-mini-high",
      "display_name": "gpt-5-mini-high",
      "provider": "openai",
      "aliases": [
        "gpt-5-mini-high"
      ],
      "openness": null,
      "license": "Proprietary",
      "parameter_billions": null,
      "context_window": null,
      "modalities": [
        "text"
      ],
      "source_ids": [
        "lmarena"
      ],
      "composite": {
        "score_0_100": 77.97,
        "confidence": "low",
        "components": [
          {
            "source_id": "lmarena",
            "metric": "lmarena_text_overall_rating",
            "normalized_0_100": 77.97,
            "weight": 0.35
          }
        ],
        "derived_from": [
          "lmarena:lmarena_text_overall_rating"
        ],
        "limitations": [
          "This is an external model-intelligence prior, not a RunLocalAI local speed or fit measurement.",
          "Provider papers, human-preference ratings, and LiveBench judgments are not interchangeable with quantized local execution.",
          "Use this score only beside Will-It-Run fit, quantization, context, runtime, and measured tok/s evidence."
        ]
      },
      "scores": [
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_chinese_rating",
          "value": 1389.3419134439628,
          "normalized_0_100": 79.84,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 132,
          "lower": 1372.7738613442837,
          "upper": 1405.9099655436416,
          "category": "chinese",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_coding_rating",
          "value": 1406.9178559418021,
          "normalized_0_100": 81.93,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 113,
          "lower": 1398.6207644943613,
          "upper": 1415.214947389243,
          "category": "coding",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_overall_rating",
          "value": 1373.6171614504983,
          "normalized_0_100": 77.97,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 129,
          "lower": 1369.0817325491892,
          "upper": 1378.1525903518075,
          "category": "overall",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        }
      ]
    },
    {
      "model_key": "deepseek-ai-deepseek-r1",
      "display_name": "deepseek-ai/DeepSeek-R1",
      "provider": "deepseek-ai",
      "aliases": [
        "deepseek-ai/DeepSeek-R1"
      ],
      "openness": "open",
      "license": "Unknown",
      "parameter_billions": 684.5,
      "context_window": null,
      "modalities": [
        "text"
      ],
      "source_ids": [
        "openevals"
      ],
      "composite": {
        "score_0_100": 77.75,
        "confidence": "low",
        "components": [
          {
            "source_id": "openevals",
            "metric": "openevals_aggregate",
            "normalized_0_100": 77.75,
            "weight": 0.45
          }
        ],
        "derived_from": [
          "openevals:openevals_aggregate"
        ],
        "limitations": [
          "This is an external model-intelligence prior, not a RunLocalAI local speed or fit measurement.",
          "Provider papers, human-preference ratings, and LiveBench judgments are not interchangeable with quantized local execution.",
          "Use this score only beside Will-It-Run fit, quantization, context, runtime, and measured tok/s evidence."
        ]
      },
      "scores": [
        {
          "source_id": "openevals",
          "metric": "gpqa",
          "value": 71.5,
          "normalized_0_100": 71.5,
          "scale": "0-100",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/OpenEvals/leaderboard-data",
          "source_dataset": "OpenEvals/leaderboard-data",
          "source_config": "default",
          "source_split": "train",
          "source_row_count": 105,
          "captured_at": "2026-05-28T11:57:28.766Z",
          "evidence_note": "External model quality prior from OpenEvals; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "openevals",
          "metric": "mmlu_pro",
          "value": 84,
          "normalized_0_100": 84,
          "scale": "0-100",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/OpenEvals/leaderboard-data",
          "source_dataset": "OpenEvals/leaderboard-data",
          "source_config": "default",
          "source_split": "train",
          "source_row_count": 105,
          "captured_at": "2026-05-28T11:57:28.766Z",
          "evidence_note": "External model quality prior from OpenEvals; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "openevals",
          "metric": "openevals_aggregate",
          "value": 77.75,
          "normalized_0_100": 77.75,
          "scale": "0-100",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/OpenEvals/leaderboard-data",
          "source_dataset": "OpenEvals/leaderboard-data",
          "source_config": "default",
          "source_split": "train",
          "source_row_count": 105,
          "captured_at": "2026-05-28T11:57:28.766Z",
          "evidence_note": "External model quality prior from OpenEvals; not a RunLocalAI local execution measurement."
        }
      ]
    },
    {
      "model_key": "jdopensource-joyai-llm-flash",
      "display_name": "jdopensource/JoyAI-LLM-Flash",
      "provider": "jdopensource",
      "aliases": [
        "jdopensource/JoyAI-LLM-Flash"
      ],
      "openness": "open",
      "license": "Unknown",
      "parameter_billions": 49.3,
      "context_window": null,
      "modalities": [
        "text"
      ],
      "source_ids": [
        "openevals"
      ],
      "composite": {
        "score_0_100": 77.72,
        "confidence": "low",
        "components": [
          {
            "source_id": "openevals",
            "metric": "openevals_aggregate",
            "normalized_0_100": 77.72,
            "weight": 0.45
          }
        ],
        "derived_from": [
          "openevals:openevals_aggregate"
        ],
        "limitations": [
          "This is an external model-intelligence prior, not a RunLocalAI local speed or fit measurement.",
          "Provider papers, human-preference ratings, and LiveBench judgments are not interchangeable with quantized local execution.",
          "Use this score only beside Will-It-Run fit, quantization, context, runtime, and measured tok/s evidence."
        ]
      },
      "scores": [
        {
          "source_id": "openevals",
          "metric": "gpqa",
          "value": 74.43,
          "normalized_0_100": 74.43,
          "scale": "0-100",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/OpenEvals/leaderboard-data",
          "source_dataset": "OpenEvals/leaderboard-data",
          "source_config": "default",
          "source_split": "train",
          "source_row_count": 105,
          "captured_at": "2026-05-28T11:57:28.766Z",
          "evidence_note": "External model quality prior from OpenEvals; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "openevals",
          "metric": "mmlu_pro",
          "value": 81.02,
          "normalized_0_100": 81.02,
          "scale": "0-100",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/OpenEvals/leaderboard-data",
          "source_dataset": "OpenEvals/leaderboard-data",
          "source_config": "default",
          "source_split": "train",
          "source_row_count": 105,
          "captured_at": "2026-05-28T11:57:28.766Z",
          "evidence_note": "External model quality prior from OpenEvals; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "openevals",
          "metric": "openevals_aggregate",
          "value": 77.72,
          "normalized_0_100": 77.72,
          "scale": "0-100",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/OpenEvals/leaderboard-data",
          "source_dataset": "OpenEvals/leaderboard-data",
          "source_config": "default",
          "source_split": "train",
          "source_row_count": 105,
          "captured_at": "2026-05-28T11:57:28.766Z",
          "evidence_note": "External model quality prior from OpenEvals; not a RunLocalAI local execution measurement."
        }
      ]
    },
    {
      "model_key": "qwen-qwen3-5-4b",
      "display_name": "Qwen/Qwen3.5-4B",
      "provider": "Qwen",
      "aliases": [
        "Qwen/Qwen3.5-4B"
      ],
      "openness": "open",
      "license": "Unknown",
      "parameter_billions": 4.7,
      "context_window": null,
      "modalities": [
        "text"
      ],
      "source_ids": [
        "openevals"
      ],
      "composite": {
        "score_0_100": 77.65,
        "confidence": "low",
        "components": [
          {
            "source_id": "openevals",
            "metric": "openevals_aggregate",
            "normalized_0_100": 77.65,
            "weight": 0.45
          }
        ],
        "derived_from": [
          "openevals:openevals_aggregate"
        ],
        "limitations": [
          "This is an external model-intelligence prior, not a RunLocalAI local speed or fit measurement.",
          "Provider papers, human-preference ratings, and LiveBench judgments are not interchangeable with quantized local execution.",
          "Use this score only beside Will-It-Run fit, quantization, context, runtime, and measured tok/s evidence."
        ]
      },
      "scores": [
        {
          "source_id": "openevals",
          "metric": "gpqa",
          "value": 76.2,
          "normalized_0_100": 76.2,
          "scale": "0-100",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/OpenEvals/leaderboard-data",
          "source_dataset": "OpenEvals/leaderboard-data",
          "source_config": "default",
          "source_split": "train",
          "source_row_count": 105,
          "captured_at": "2026-05-28T11:57:28.766Z",
          "evidence_note": "External model quality prior from OpenEvals; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "openevals",
          "metric": "mmlu_pro",
          "value": 79.1,
          "normalized_0_100": 79.1,
          "scale": "0-100",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/OpenEvals/leaderboard-data",
          "source_dataset": "OpenEvals/leaderboard-data",
          "source_config": "default",
          "source_split": "train",
          "source_row_count": 105,
          "captured_at": "2026-05-28T11:57:28.766Z",
          "evidence_note": "External model quality prior from OpenEvals; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "openevals",
          "metric": "openevals_aggregate",
          "value": 77.65,
          "normalized_0_100": 77.65,
          "scale": "0-100",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/OpenEvals/leaderboard-data",
          "source_dataset": "OpenEvals/leaderboard-data",
          "source_config": "default",
          "source_split": "train",
          "source_row_count": 105,
          "captured_at": "2026-05-28T11:57:28.766Z",
          "evidence_note": "External model quality prior from OpenEvals; not a RunLocalAI local execution measurement."
        }
      ]
    },
    {
      "model_key": "kimi-k2-0711-preview",
      "display_name": "kimi-k2-0711-preview",
      "provider": "moonshot",
      "aliases": [
        "kimi-k2-0711-preview"
      ],
      "openness": null,
      "license": "Modified MIT",
      "parameter_billions": null,
      "context_window": null,
      "modalities": [
        "text"
      ],
      "source_ids": [
        "lmarena"
      ],
      "composite": {
        "score_0_100": 77.63,
        "confidence": "low",
        "components": [
          {
            "source_id": "lmarena",
            "metric": "lmarena_text_overall_rating",
            "normalized_0_100": 77.63,
            "weight": 0.35
          }
        ],
        "derived_from": [
          "lmarena:lmarena_text_overall_rating"
        ],
        "limitations": [
          "This is an external model-intelligence prior, not a RunLocalAI local speed or fit measurement.",
          "Provider papers, human-preference ratings, and LiveBench judgments are not interchangeable with quantized local execution.",
          "Use this score only beside Will-It-Run fit, quantization, context, runtime, and measured tok/s evidence."
        ]
      },
      "scores": [
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_chinese_rating",
          "value": 1398.136900445792,
          "normalized_0_100": 80.89,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 127,
          "lower": 1382.6977048138137,
          "upper": 1413.5760960777702,
          "category": "chinese",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_coding_rating",
          "value": 1377.8204872652132,
          "normalized_0_100": 78.47,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 142,
          "lower": 1369.4032517081587,
          "upper": 1386.2377228222674,
          "category": "coding",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_overall_rating",
          "value": 1370.7681143933428,
          "normalized_0_100": 77.63,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 131,
          "lower": 1365.9377347172465,
          "upper": 1375.598494069439,
          "category": "overall",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        }
      ]
    },
    {
      "model_key": "mistral-medium-2505",
      "display_name": "mistral-medium-2505",
      "provider": "mistral",
      "aliases": [
        "mistral-medium-2505"
      ],
      "openness": null,
      "license": "Proprietary",
      "parameter_billions": null,
      "context_window": null,
      "modalities": [
        "text"
      ],
      "source_ids": [
        "lmarena"
      ],
      "composite": {
        "score_0_100": 77.46,
        "confidence": "low",
        "components": [
          {
            "source_id": "lmarena",
            "metric": "lmarena_text_overall_rating",
            "normalized_0_100": 77.46,
            "weight": 0.35
          }
        ],
        "derived_from": [
          "lmarena:lmarena_text_overall_rating"
        ],
        "limitations": [
          "This is an external model-intelligence prior, not a RunLocalAI local speed or fit measurement.",
          "Provider papers, human-preference ratings, and LiveBench judgments are not interchangeable with quantized local execution.",
          "Use this score only beside Will-It-Run fit, quantization, context, runtime, and measured tok/s evidence."
        ]
      },
      "scores": [
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_chinese_rating",
          "value": 1373.7650008291444,
          "normalized_0_100": 77.99,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 147,
          "lower": 1359.2425669911145,
          "upper": 1388.2874346671742,
          "category": "chinese",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_coding_rating",
          "value": 1386.3355129053562,
          "normalized_0_100": 79.48,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 135,
          "lower": 1378.3792933692193,
          "upper": 1394.291732441493,
          "category": "coding",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_overall_rating",
          "value": 1369.2977101384126,
          "normalized_0_100": 77.46,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 132,
          "lower": 1364.64056577546,
          "upper": 1373.9548545013654,
          "category": "overall",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        }
      ]
    },
    {
      "model_key": "gemini-2-5-flash-lite-preview-06-17-thinking",
      "display_name": "gemini-2.5-flash-lite-preview-06-17-thinking",
      "provider": "google",
      "aliases": [
        "gemini-2.5-flash-lite-preview-06-17-thinking"
      ],
      "openness": null,
      "license": "Proprietary",
      "parameter_billions": null,
      "context_window": null,
      "modalities": [
        "text"
      ],
      "source_ids": [
        "lmarena"
      ],
      "composite": {
        "score_0_100": 77.37,
        "confidence": "low",
        "components": [
          {
            "source_id": "lmarena",
            "metric": "lmarena_text_overall_rating",
            "normalized_0_100": 77.37,
            "weight": 0.35
          }
        ],
        "derived_from": [
          "lmarena:lmarena_text_overall_rating"
        ],
        "limitations": [
          "This is an external model-intelligence prior, not a RunLocalAI local speed or fit measurement.",
          "Provider papers, human-preference ratings, and LiveBench judgments are not interchangeable with quantized local execution.",
          "Use this score only beside Will-It-Run fit, quantization, context, runtime, and measured tok/s evidence."
        ]
      },
      "scores": [
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_chinese_rating",
          "value": 1401.7830601433866,
          "normalized_0_100": 81.32,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 124,
          "lower": 1387.3644264297504,
          "upper": 1416.2016938570227,
          "category": "chinese",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_coding_rating",
          "value": 1373.9510078814328,
          "normalized_0_100": 78.01,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 145,
          "lower": 1366.0618471829791,
          "upper": 1381.8401685798863,
          "category": "coding",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_overall_rating",
          "value": 1368.5354202787912,
          "normalized_0_100": 77.37,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 133,
          "lower": 1364.0707838880603,
          "upper": 1373.0000566695223,
          "category": "overall",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        }
      ]
    },
    {
      "model_key": "qwen3-next-80b-a3b-thinking",
      "display_name": "qwen3-next-80b-a3b-thinking",
      "provider": "alibaba",
      "aliases": [
        "qwen3-next-80b-a3b-thinking"
      ],
      "openness": null,
      "license": "Apache 2.0",
      "parameter_billions": null,
      "context_window": null,
      "modalities": [
        "text"
      ],
      "source_ids": [
        "lmarena"
      ],
      "composite": {
        "score_0_100": 77.26,
        "confidence": "low",
        "components": [
          {
            "source_id": "lmarena",
            "metric": "lmarena_text_overall_rating",
            "normalized_0_100": 77.26,
            "weight": 0.35
          }
        ],
        "derived_from": [
          "lmarena:lmarena_text_overall_rating"
        ],
        "limitations": [
          "This is an external model-intelligence prior, not a RunLocalAI local speed or fit measurement.",
          "Provider papers, human-preference ratings, and LiveBench judgments are not interchangeable with quantized local execution.",
          "Use this score only beside Will-It-Run fit, quantization, context, runtime, and measured tok/s evidence."
        ]
      },
      "scores": [
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_chinese_rating",
          "value": 1416.6267050020397,
          "normalized_0_100": 83.09,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 116,
          "lower": 1394.684794406391,
          "upper": 1438.5686155976882,
          "category": "chinese",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_coding_rating",
          "value": 1391.573240104213,
          "normalized_0_100": 80.11,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 129,
          "lower": 1380.1318201385345,
          "upper": 1403.0146600698913,
          "category": "coding",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_overall_rating",
          "value": 1367.605546038248,
          "normalized_0_100": 77.26,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 134,
          "lower": 1361.7872336693795,
          "upper": 1373.4238584071165,
          "category": "overall",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        }
      ]
    },
    {
      "model_key": "grok-3-mini-high",
      "display_name": "grok-3-mini-high",
      "provider": "xai",
      "aliases": [
        "grok-3-mini-high"
      ],
      "openness": null,
      "license": "Proprietary",
      "parameter_billions": null,
      "context_window": null,
      "modalities": [
        "text"
      ],
      "source_ids": [
        "lmarena"
      ],
      "composite": {
        "score_0_100": 77.14,
        "confidence": "low",
        "components": [
          {
            "source_id": "lmarena",
            "metric": "lmarena_text_overall_rating",
            "normalized_0_100": 77.14,
            "weight": 0.35
          }
        ],
        "derived_from": [
          "lmarena:lmarena_text_overall_rating"
        ],
        "limitations": [
          "This is an external model-intelligence prior, not a RunLocalAI local speed or fit measurement.",
          "Provider papers, human-preference ratings, and LiveBench judgments are not interchangeable with quantized local execution.",
          "Use this score only beside Will-It-Run fit, quantization, context, runtime, and measured tok/s evidence."
        ]
      },
      "scores": [
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_chinese_rating",
          "value": 1378.6090724456092,
          "normalized_0_100": 78.56,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 141,
          "lower": 1356.3418609455102,
          "upper": 1400.8762839457079,
          "category": "chinese",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_coding_rating",
          "value": 1375.834610463099,
          "normalized_0_100": 78.23,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 143,
          "lower": 1365.6299950008556,
          "upper": 1386.039225925342,
          "category": "coding",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_overall_rating",
          "value": 1366.5900930898047,
          "normalized_0_100": 77.14,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 135,
          "lower": 1361.28133045602,
          "upper": 1371.8988557235893,
          "category": "overall",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        }
      ]
    },
    {
      "model_key": "o1-2024-12-17",
      "display_name": "o1-2024-12-17",
      "provider": "openai",
      "aliases": [
        "o1-2024-12-17"
      ],
      "openness": null,
      "license": "Proprietary",
      "parameter_billions": null,
      "context_window": null,
      "modalities": [
        "text"
      ],
      "source_ids": [
        "lmarena"
      ],
      "composite": {
        "score_0_100": 77.05,
        "confidence": "low",
        "components": [
          {
            "source_id": "lmarena",
            "metric": "lmarena_text_overall_rating",
            "normalized_0_100": 77.05,
            "weight": 0.35
          }
        ],
        "derived_from": [
          "lmarena:lmarena_text_overall_rating"
        ],
        "limitations": [
          "This is an external model-intelligence prior, not a RunLocalAI local speed or fit measurement.",
          "Provider papers, human-preference ratings, and LiveBench judgments are not interchangeable with quantized local execution.",
          "Use this score only beside Will-It-Run fit, quantization, context, runtime, and measured tok/s evidence."
        ]
      },
      "scores": [
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_chinese_rating",
          "value": 1393.6444890697983,
          "normalized_0_100": 80.35,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 129,
          "lower": 1379.3715322377882,
          "upper": 1407.9174459018084,
          "category": "chinese",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_coding_rating",
          "value": 1367.4367047255528,
          "normalized_0_100": 77.24,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 153,
          "lower": 1357.9221785436434,
          "upper": 1376.9512309074623,
          "category": "coding",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_overall_rating",
          "value": 1365.840507088446,
          "normalized_0_100": 77.05,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 137,
          "lower": 1361.396248278902,
          "upper": 1370.2847658979895,
          "category": "overall",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        }
      ]
    },
    {
      "model_key": "qwen3-235b-a22b",
      "display_name": "qwen3-235b-a22b",
      "provider": "alibaba",
      "aliases": [
        "qwen3-235b-a22b"
      ],
      "openness": null,
      "license": "Apache 2.0",
      "parameter_billions": null,
      "context_window": null,
      "modalities": [
        "text"
      ],
      "source_ids": [
        "lmarena"
      ],
      "composite": {
        "score_0_100": 77.04,
        "confidence": "low",
        "components": [
          {
            "source_id": "lmarena",
            "metric": "lmarena_text_overall_rating",
            "normalized_0_100": 77.04,
            "weight": 0.35
          }
        ],
        "derived_from": [
          "lmarena:lmarena_text_overall_rating"
        ],
        "limitations": [
          "This is an external model-intelligence prior, not a RunLocalAI local speed or fit measurement.",
          "Provider papers, human-preference ratings, and LiveBench judgments are not interchangeable with quantized local execution.",
          "Use this score only beside Will-It-Run fit, quantization, context, runtime, and measured tok/s evidence."
        ]
      },
      "scores": [
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_chinese_rating",
          "value": 1379.881951167851,
          "normalized_0_100": 78.72,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 140,
          "lower": 1363.4262587894095,
          "upper": 1396.3376435462924,
          "category": "chinese",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_coding_rating",
          "value": 1385.7615849157626,
          "normalized_0_100": 79.42,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 136,
          "lower": 1376.8156905777273,
          "upper": 1394.7074792537978,
          "category": "coding",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_overall_rating",
          "value": 1365.7632514118282,
          "normalized_0_100": 77.04,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 138,
          "lower": 1361.0878551502776,
          "upper": 1370.4386476733785,
          "category": "overall",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        }
      ]
    },
    {
      "model_key": "gpt-oss-120b",
      "display_name": "gpt-oss-120b",
      "provider": "openai",
      "aliases": [
        "gpt-oss-120b"
      ],
      "openness": null,
      "license": "Apache 2.0",
      "parameter_billions": null,
      "context_window": null,
      "modalities": [
        "text"
      ],
      "source_ids": [
        "lmarena"
      ],
      "composite": {
        "score_0_100": 77,
        "confidence": "low",
        "components": [
          {
            "source_id": "lmarena",
            "metric": "lmarena_text_overall_rating",
            "normalized_0_100": 77,
            "weight": 0.35
          }
        ],
        "derived_from": [
          "lmarena:lmarena_text_overall_rating"
        ],
        "limitations": [
          "This is an external model-intelligence prior, not a RunLocalAI local speed or fit measurement.",
          "Provider papers, human-preference ratings, and LiveBench judgments are not interchangeable with quantized local execution.",
          "Use this score only beside Will-It-Run fit, quantization, context, runtime, and measured tok/s evidence."
        ]
      },
      "scores": [
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_chinese_rating",
          "value": 1377.0600322612686,
          "normalized_0_100": 78.38,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 145,
          "lower": 1362.2315493679444,
          "upper": 1391.8885151545928,
          "category": "chinese",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_coding_rating",
          "value": 1380.291861985014,
          "normalized_0_100": 78.76,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 138,
          "lower": 1372.5968073449376,
          "upper": 1387.9869166250905,
          "category": "coding",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_overall_rating",
          "value": 1365.4669112823094,
          "normalized_0_100": 77,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 139,
          "lower": 1361.139784138134,
          "upper": 1369.794038426485,
          "category": "overall",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        }
      ]
    },
    {
      "model_key": "claude-opus-4-20250514",
      "display_name": "claude-opus-4-20250514",
      "provider": "anthropic",
      "aliases": [
        "claude-opus-4-20250514"
      ],
      "openness": null,
      "license": "Proprietary",
      "parameter_billions": null,
      "context_window": null,
      "modalities": [
        "text"
      ],
      "source_ids": [
        "lmarena"
      ],
      "composite": {
        "score_0_100": 76.86,
        "confidence": "low",
        "components": [
          {
            "source_id": "lmarena",
            "metric": "lmarena_text_overall_rating",
            "normalized_0_100": 76.86,
            "weight": 0.35
          }
        ],
        "derived_from": [
          "lmarena:lmarena_text_overall_rating"
        ],
        "limitations": [
          "This is an external model-intelligence prior, not a RunLocalAI local speed or fit measurement.",
          "Provider papers, human-preference ratings, and LiveBench judgments are not interchangeable with quantized local execution.",
          "Use this score only beside Will-It-Run fit, quantization, context, runtime, and measured tok/s evidence."
        ]
      },
      "scores": [
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_chinese_rating",
          "value": 1387.097321168999,
          "normalized_0_100": 79.57,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 136,
          "lower": 1374.8029727690573,
          "upper": 1399.3916695689406,
          "category": "chinese",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_coding_rating",
          "value": 1401.727481816318,
          "normalized_0_100": 81.31,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 120,
          "lower": 1394.506702988518,
          "upper": 1408.9482606441181,
          "category": "coding",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_overall_rating",
          "value": 1364.2812256044053,
          "normalized_0_100": 76.86,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 140,
          "lower": 1359.9981188907623,
          "upper": 1368.5643323180486,
          "category": "overall",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        }
      ]
    },
    {
      "model_key": "amazon-nova-experimental-chat-10-09",
      "display_name": "amazon-nova-experimental-chat-10-09",
      "provider": "amazon",
      "aliases": [
        "amazon-nova-experimental-chat-10-09"
      ],
      "openness": null,
      "license": "Proprietary",
      "parameter_billions": null,
      "context_window": null,
      "modalities": [
        "text"
      ],
      "source_ids": [
        "lmarena"
      ],
      "composite": {
        "score_0_100": 76.85,
        "confidence": "low",
        "components": [
          {
            "source_id": "lmarena",
            "metric": "lmarena_text_overall_rating",
            "normalized_0_100": 76.85,
            "weight": 0.35
          }
        ],
        "derived_from": [
          "lmarena:lmarena_text_overall_rating"
        ],
        "limitations": [
          "This is an external model-intelligence prior, not a RunLocalAI local speed or fit measurement.",
          "Provider papers, human-preference ratings, and LiveBench judgments are not interchangeable with quantized local execution.",
          "Use this score only beside Will-It-Run fit, quantization, context, runtime, and measured tok/s evidence."
        ]
      },
      "scores": [
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_coding_rating",
          "value": 1365.5746774293368,
          "normalized_0_100": 77.01,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 158,
          "lower": 1341.4673391359238,
          "upper": 1389.6820157227496,
          "category": "coding",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_overall_rating",
          "value": 1364.16566795767,
          "normalized_0_100": 76.85,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 141,
          "lower": 1353.3812718126087,
          "upper": 1374.9500641027307,
          "category": "overall",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        }
      ]
    },
    {
      "model_key": "deepseek-ai-deepseek-v3",
      "display_name": "deepseek-ai/DeepSeek-V3",
      "provider": "deepseek-ai",
      "aliases": [
        "deepseek-ai/DeepSeek-V3"
      ],
      "openness": "open",
      "license": "Unknown",
      "parameter_billions": 684.5,
      "context_window": null,
      "modalities": [
        "text"
      ],
      "source_ids": [
        "openevals"
      ],
      "composite": {
        "score_0_100": 76.85,
        "confidence": "low",
        "components": [
          {
            "source_id": "openevals",
            "metric": "openevals_aggregate",
            "normalized_0_100": 76.85,
            "weight": 0.45
          }
        ],
        "derived_from": [
          "openevals:openevals_aggregate"
        ],
        "limitations": [
          "This is an external model-intelligence prior, not a RunLocalAI local speed or fit measurement.",
          "Provider papers, human-preference ratings, and LiveBench judgments are not interchangeable with quantized local execution.",
          "Use this score only beside Will-It-Run fit, quantization, context, runtime, and measured tok/s evidence."
        ]
      },
      "scores": [
        {
          "source_id": "openevals",
          "metric": "gsm8k",
          "value": 89.3,
          "normalized_0_100": 89.3,
          "scale": "0-100",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/OpenEvals/leaderboard-data",
          "source_dataset": "OpenEvals/leaderboard-data",
          "source_config": "default",
          "source_split": "train",
          "source_row_count": 105,
          "captured_at": "2026-05-28T11:57:28.766Z",
          "evidence_note": "External model quality prior from OpenEvals; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "openevals",
          "metric": "mmlu_pro",
          "value": 64.4,
          "normalized_0_100": 64.4,
          "scale": "0-100",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/OpenEvals/leaderboard-data",
          "source_dataset": "OpenEvals/leaderboard-data",
          "source_config": "default",
          "source_split": "train",
          "source_row_count": 105,
          "captured_at": "2026-05-28T11:57:28.766Z",
          "evidence_note": "External model quality prior from OpenEvals; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "openevals",
          "metric": "openevals_aggregate",
          "value": 76.85,
          "normalized_0_100": 76.85,
          "scale": "0-100",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/OpenEvals/leaderboard-data",
          "source_dataset": "OpenEvals/leaderboard-data",
          "source_config": "default",
          "source_split": "train",
          "source_row_count": 105,
          "captured_at": "2026-05-28T11:57:28.766Z",
          "evidence_note": "External model quality prior from OpenEvals; not a RunLocalAI local execution measurement."
        }
      ]
    },
    {
      "model_key": "minimax-m2-5",
      "display_name": "minimax-m2.5",
      "provider": "minimax",
      "aliases": [
        "minimax-m2.5"
      ],
      "openness": null,
      "license": "Modified MIT",
      "parameter_billions": null,
      "context_window": null,
      "modalities": [
        "text"
      ],
      "source_ids": [
        "lmarena"
      ],
      "composite": {
        "score_0_100": 76.73,
        "confidence": "low",
        "components": [
          {
            "source_id": "lmarena",
            "metric": "lmarena_text_overall_rating",
            "normalized_0_100": 76.73,
            "weight": 0.35
          }
        ],
        "derived_from": [
          "lmarena:lmarena_text_overall_rating"
        ],
        "limitations": [
          "This is an external model-intelligence prior, not a RunLocalAI local speed or fit measurement.",
          "Provider papers, human-preference ratings, and LiveBench judgments are not interchangeable with quantized local execution.",
          "Use this score only beside Will-It-Run fit, quantization, context, runtime, and measured tok/s evidence."
        ]
      },
      "scores": [
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_chinese_rating",
          "value": 1393.2076519548275,
          "normalized_0_100": 80.3,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 130,
          "lower": 1377.073856977545,
          "upper": 1409.34144693211,
          "category": "chinese",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_coding_rating",
          "value": 1388.6475782949212,
          "normalized_0_100": 79.76,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 132,
          "lower": 1381.1238996187442,
          "upper": 1396.171256971098,
          "category": "coding",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_overall_rating",
          "value": 1363.1900497704605,
          "normalized_0_100": 76.73,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 142,
          "lower": 1358.8886328286203,
          "upper": 1367.4914667123005,
          "category": "overall",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        }
      ]
    },
    {
      "model_key": "nova-2-lite",
      "display_name": "nova-2-lite",
      "provider": "amazon",
      "aliases": [
        "nova-2-lite"
      ],
      "openness": null,
      "license": "Proprietary",
      "parameter_billions": null,
      "context_window": null,
      "modalities": [
        "text"
      ],
      "source_ids": [
        "lmarena"
      ],
      "composite": {
        "score_0_100": 76.68,
        "confidence": "low",
        "components": [
          {
            "source_id": "lmarena",
            "metric": "lmarena_text_overall_rating",
            "normalized_0_100": 76.68,
            "weight": 0.35
          }
        ],
        "derived_from": [
          "lmarena:lmarena_text_overall_rating"
        ],
        "limitations": [
          "This is an external model-intelligence prior, not a RunLocalAI local speed or fit measurement.",
          "Provider papers, human-preference ratings, and LiveBench judgments are not interchangeable with quantized local execution.",
          "Use this score only beside Will-It-Run fit, quantization, context, runtime, and measured tok/s evidence."
        ]
      },
      "scores": [
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_chinese_rating",
          "value": 1369.849601677453,
          "normalized_0_100": 77.52,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 149,
          "lower": 1347.4503827982608,
          "upper": 1392.2488205566451,
          "category": "chinese",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_coding_rating",
          "value": 1388.3231489625846,
          "normalized_0_100": 79.72,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 133,
          "lower": 1376.5989992410173,
          "upper": 1400.0472986841517,
          "category": "coding",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_overall_rating",
          "value": 1362.7273829992312,
          "normalized_0_100": 76.68,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 143,
          "lower": 1356.6608497863394,
          "upper": 1368.793916212123,
          "category": "overall",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        }
      ]
    },
    {
      "model_key": "ling-flash-2-0",
      "display_name": "ling-flash-2.0",
      "provider": "ant-group",
      "aliases": [
        "ling-flash-2.0"
      ],
      "openness": null,
      "license": "MIT",
      "parameter_billions": null,
      "context_window": null,
      "modalities": [
        "text"
      ],
      "source_ids": [
        "lmarena"
      ],
      "composite": {
        "score_0_100": 76.67,
        "confidence": "low",
        "components": [
          {
            "source_id": "lmarena",
            "metric": "lmarena_text_overall_rating",
            "normalized_0_100": 76.67,
            "weight": 0.35
          }
        ],
        "derived_from": [
          "lmarena:lmarena_text_overall_rating"
        ],
        "limitations": [
          "This is an external model-intelligence prior, not a RunLocalAI local speed or fit measurement.",
          "Provider papers, human-preference ratings, and LiveBench judgments are not interchangeable with quantized local execution.",
          "Use this score only beside Will-It-Run fit, quantization, context, runtime, and measured tok/s evidence."
        ]
      },
      "scores": [
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_chinese_rating",
          "value": 1410.7124091778605,
          "normalized_0_100": 82.38,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 120,
          "lower": 1377.8087514372864,
          "upper": 1443.6160669184346,
          "category": "chinese",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_coding_rating",
          "value": 1390.9472060035303,
          "normalized_0_100": 80.03,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 130,
          "lower": 1376.2903584047306,
          "upper": 1405.60405360233,
          "category": "coding",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_overall_rating",
          "value": 1362.6687591242746,
          "normalized_0_100": 76.67,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 144,
          "lower": 1355.4754988793138,
          "upper": 1369.8620193692354,
          "category": "overall",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        }
      ]
    },
    {
      "model_key": "grok-3-mini-beta",
      "display_name": "grok-3-mini-beta",
      "provider": "xai",
      "aliases": [
        "grok-3-mini-beta"
      ],
      "openness": null,
      "license": "Proprietary",
      "parameter_billions": null,
      "context_window": null,
      "modalities": [
        "text"
      ],
      "source_ids": [
        "lmarena"
      ],
      "composite": {
        "score_0_100": 76.66,
        "confidence": "low",
        "components": [
          {
            "source_id": "lmarena",
            "metric": "lmarena_text_overall_rating",
            "normalized_0_100": 76.66,
            "weight": 0.35
          }
        ],
        "derived_from": [
          "lmarena:lmarena_text_overall_rating"
        ],
        "limitations": [
          "This is an external model-intelligence prior, not a RunLocalAI local speed or fit measurement.",
          "Provider papers, human-preference ratings, and LiveBench judgments are not interchangeable with quantized local execution.",
          "Use this score only beside Will-It-Run fit, quantization, context, runtime, and measured tok/s evidence."
        ]
      },
      "scores": [
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_chinese_rating",
          "value": 1374.7794830917885,
          "normalized_0_100": 78.11,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 146,
          "lower": 1356.550309963523,
          "upper": 1393.0086562200538,
          "category": "chinese",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_coding_rating",
          "value": 1366.028445182452,
          "normalized_0_100": 77.07,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 156,
          "lower": 1357.0117005772238,
          "upper": 1375.0451897876803,
          "category": "coding",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_overall_rating",
          "value": 1362.5733416594662,
          "normalized_0_100": 76.66,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 145,
          "lower": 1357.597304293633,
          "upper": 1367.5493790252992,
          "category": "overall",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        }
      ]
    },
    {
      "model_key": "deepseek-ai-deepseek-ocr-2",
      "display_name": "deepseek-ai/DeepSeek-OCR-2",
      "provider": "deepseek-ai",
      "aliases": [
        "deepseek-ai/DeepSeek-OCR-2"
      ],
      "openness": "open",
      "license": "Unknown",
      "parameter_billions": 3.4,
      "context_window": null,
      "modalities": [
        "text"
      ],
      "source_ids": [
        "openevals"
      ],
      "composite": {
        "score_0_100": 76.3,
        "confidence": "low",
        "components": [
          {
            "source_id": "openevals",
            "metric": "openevals_aggregate",
            "normalized_0_100": 76.3,
            "weight": 0.45
          }
        ],
        "derived_from": [
          "openevals:openevals_aggregate"
        ],
        "limitations": [
          "This is an external model-intelligence prior, not a RunLocalAI local speed or fit measurement.",
          "Provider papers, human-preference ratings, and LiveBench judgments are not interchangeable with quantized local execution.",
          "Use this score only beside Will-It-Run fit, quantization, context, runtime, and measured tok/s evidence."
        ]
      },
      "scores": [
        {
          "source_id": "openevals",
          "metric": "olm_ocr",
          "value": 76.3,
          "normalized_0_100": 76.3,
          "scale": "0-100",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/OpenEvals/leaderboard-data",
          "source_dataset": "OpenEvals/leaderboard-data",
          "source_config": "default",
          "source_split": "train",
          "source_row_count": 105,
          "captured_at": "2026-05-28T11:57:28.766Z",
          "evidence_note": "External model quality prior from OpenEvals; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "openevals",
          "metric": "openevals_aggregate",
          "value": 76.3,
          "normalized_0_100": 76.3,
          "scale": "0-100",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/OpenEvals/leaderboard-data",
          "source_dataset": "OpenEvals/leaderboard-data",
          "source_config": "default",
          "source_split": "train",
          "source_row_count": 105,
          "captured_at": "2026-05-28T11:57:28.766Z",
          "evidence_note": "External model quality prior from OpenEvals; not a RunLocalAI local execution measurement."
        }
      ]
    },
    {
      "model_key": "lightonai-lightonocr-1b-1025",
      "display_name": "lightonai/LightOnOCR-1B-1025",
      "provider": "lightonai",
      "aliases": [
        "lightonai/LightOnOCR-1B-1025"
      ],
      "openness": "open",
      "license": "Unknown",
      "parameter_billions": null,
      "context_window": null,
      "modalities": [
        "text"
      ],
      "source_ids": [
        "openevals"
      ],
      "composite": {
        "score_0_100": 76.1,
        "confidence": "low",
        "components": [
          {
            "source_id": "openevals",
            "metric": "openevals_aggregate",
            "normalized_0_100": 76.1,
            "weight": 0.45
          }
        ],
        "derived_from": [
          "openevals:openevals_aggregate"
        ],
        "limitations": [
          "This is an external model-intelligence prior, not a RunLocalAI local speed or fit measurement.",
          "Provider papers, human-preference ratings, and LiveBench judgments are not interchangeable with quantized local execution.",
          "Use this score only beside Will-It-Run fit, quantization, context, runtime, and measured tok/s evidence."
        ]
      },
      "scores": [
        {
          "source_id": "openevals",
          "metric": "olm_ocr",
          "value": 76.1,
          "normalized_0_100": 76.1,
          "scale": "0-100",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/OpenEvals/leaderboard-data",
          "source_dataset": "OpenEvals/leaderboard-data",
          "source_config": "default",
          "source_split": "train",
          "source_row_count": 105,
          "captured_at": "2026-05-28T11:57:28.766Z",
          "evidence_note": "External model quality prior from OpenEvals; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "openevals",
          "metric": "openevals_aggregate",
          "value": 76.1,
          "normalized_0_100": 76.1,
          "scale": "0-100",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/OpenEvals/leaderboard-data",
          "source_dataset": "OpenEvals/leaderboard-data",
          "source_config": "default",
          "source_split": "train",
          "source_row_count": 105,
          "captured_at": "2026-05-28T11:57:28.766Z",
          "evidence_note": "External model quality prior from OpenEvals; not a RunLocalAI local execution measurement."
        }
      ]
    },
    {
      "model_key": "mercury-2",
      "display_name": "mercury-2",
      "provider": "inception-ai",
      "aliases": [
        "mercury-2"
      ],
      "openness": null,
      "license": "Proprietary",
      "parameter_billions": null,
      "context_window": null,
      "modalities": [
        "text"
      ],
      "source_ids": [
        "lmarena"
      ],
      "composite": {
        "score_0_100": 75.99,
        "confidence": "low",
        "components": [
          {
            "source_id": "lmarena",
            "metric": "lmarena_text_overall_rating",
            "normalized_0_100": 75.99,
            "weight": 0.35
          }
        ],
        "derived_from": [
          "lmarena:lmarena_text_overall_rating"
        ],
        "limitations": [
          "This is an external model-intelligence prior, not a RunLocalAI local speed or fit measurement.",
          "Provider papers, human-preference ratings, and LiveBench judgments are not interchangeable with quantized local execution.",
          "Use this score only beside Will-It-Run fit, quantization, context, runtime, and measured tok/s evidence."
        ]
      },
      "scores": [
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_coding_rating",
          "value": 1391.9646806317164,
          "normalized_0_100": 80.15,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 128,
          "lower": 1371.4194581831189,
          "upper": 1412.5099030803142,
          "category": "coding",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_overall_rating",
          "value": 1356.9624380818173,
          "normalized_0_100": 75.99,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 147,
          "lower": 1346.4996495540336,
          "upper": 1367.4252266096012,
          "category": "overall",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        }
      ]
    },
    {
      "model_key": "intellect-3",
      "display_name": "intellect-3",
      "provider": null,
      "aliases": [
        "intellect-3"
      ],
      "openness": null,
      "license": "MIT",
      "parameter_billions": null,
      "context_window": null,
      "modalities": [
        "text"
      ],
      "source_ids": [
        "lmarena"
      ],
      "composite": {
        "score_0_100": 75.92,
        "confidence": "low",
        "components": [
          {
            "source_id": "lmarena",
            "metric": "lmarena_text_overall_rating",
            "normalized_0_100": 75.92,
            "weight": 0.35
          }
        ],
        "derived_from": [
          "lmarena:lmarena_text_overall_rating"
        ],
        "limitations": [
          "This is an external model-intelligence prior, not a RunLocalAI local speed or fit measurement.",
          "Provider papers, human-preference ratings, and LiveBench judgments are not interchangeable with quantized local execution.",
          "Use this score only beside Will-It-Run fit, quantization, context, runtime, and measured tok/s evidence."
        ]
      },
      "scores": [
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_chinese_rating",
          "value": 1357.5891758969065,
          "normalized_0_100": 76.07,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 153,
          "lower": 1322.8225522341181,
          "upper": 1392.3557995596946,
          "category": "chinese",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_coding_rating",
          "value": 1368.488127301595,
          "normalized_0_100": 77.36,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 151,
          "lower": 1349.8221700355007,
          "upper": 1387.154084567689,
          "category": "coding",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_overall_rating",
          "value": 1356.383028417917,
          "normalized_0_100": 75.92,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 148,
          "lower": 1348.0891591926961,
          "upper": 1364.676897643138,
          "category": "overall",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        }
      ]
    },
    {
      "model_key": "qwen3-coder-480b-a35b-instruct",
      "display_name": "qwen3-coder-480b-a35b-instruct",
      "provider": "alibaba",
      "aliases": [
        "qwen3-coder-480b-a35b-instruct"
      ],
      "openness": null,
      "license": "Apache 2.0",
      "parameter_billions": null,
      "context_window": null,
      "modalities": [
        "text"
      ],
      "source_ids": [
        "lmarena"
      ],
      "composite": {
        "score_0_100": 75.91,
        "confidence": "low",
        "components": [
          {
            "source_id": "lmarena",
            "metric": "lmarena_text_overall_rating",
            "normalized_0_100": 75.91,
            "weight": 0.35
          }
        ],
        "derived_from": [
          "lmarena:lmarena_text_overall_rating"
        ],
        "limitations": [
          "This is an external model-intelligence prior, not a RunLocalAI local speed or fit measurement.",
          "Provider papers, human-preference ratings, and LiveBench judgments are not interchangeable with quantized local execution.",
          "Use this score only beside Will-It-Run fit, quantization, context, runtime, and measured tok/s evidence."
        ]
      },
      "scores": [
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_chinese_rating",
          "value": 1356.533029402845,
          "normalized_0_100": 75.94,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 155,
          "lower": 1339.7922388249453,
          "upper": 1373.2738199807445,
          "category": "chinese",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_coding_rating",
          "value": 1412.4721772727014,
          "normalized_0_100": 82.59,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 108,
          "lower": 1403.5155479867694,
          "upper": 1421.4288065586336,
          "category": "coding",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_overall_rating",
          "value": 1356.294777428939,
          "normalized_0_100": 75.91,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 149,
          "lower": 1351.3355413069446,
          "upper": 1361.2540135509332,
          "category": "overall",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        }
      ]
    },
    {
      "model_key": "deepseek-ai-deepseek-ocr",
      "display_name": "deepseek-ai/DeepSeek-OCR",
      "provider": "deepseek-ai",
      "aliases": [
        "deepseek-ai/DeepSeek-OCR"
      ],
      "openness": "open",
      "license": "Unknown",
      "parameter_billions": 3.3,
      "context_window": null,
      "modalities": [
        "text"
      ],
      "source_ids": [
        "openevals"
      ],
      "composite": {
        "score_0_100": 75.7,
        "confidence": "low",
        "components": [
          {
            "source_id": "openevals",
            "metric": "openevals_aggregate",
            "normalized_0_100": 75.7,
            "weight": 0.45
          }
        ],
        "derived_from": [
          "openevals:openevals_aggregate"
        ],
        "limitations": [
          "This is an external model-intelligence prior, not a RunLocalAI local speed or fit measurement.",
          "Provider papers, human-preference ratings, and LiveBench judgments are not interchangeable with quantized local execution.",
          "Use this score only beside Will-It-Run fit, quantization, context, runtime, and measured tok/s evidence."
        ]
      },
      "scores": [
        {
          "source_id": "openevals",
          "metric": "olm_ocr",
          "value": 75.7,
          "normalized_0_100": 75.7,
          "scale": "0-100",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/OpenEvals/leaderboard-data",
          "source_dataset": "OpenEvals/leaderboard-data",
          "source_config": "default",
          "source_split": "train",
          "source_row_count": 105,
          "captured_at": "2026-05-28T11:57:28.766Z",
          "evidence_note": "External model quality prior from OpenEvals; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "openevals",
          "metric": "openevals_aggregate",
          "value": 75.7,
          "normalized_0_100": 75.7,
          "scale": "0-100",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/OpenEvals/leaderboard-data",
          "source_dataset": "OpenEvals/leaderboard-data",
          "source_config": "default",
          "source_split": "train",
          "source_row_count": 105,
          "captured_at": "2026-05-28T11:57:28.766Z",
          "evidence_note": "External model quality prior from OpenEvals; not a RunLocalAI local execution measurement."
        }
      ]
    },
    {
      "model_key": "glm-4-7-flash",
      "display_name": "glm-4.7-flash",
      "provider": "zai",
      "aliases": [
        "glm-4.7-flash"
      ],
      "openness": null,
      "license": "MIT",
      "parameter_billions": null,
      "context_window": null,
      "modalities": [
        "text"
      ],
      "source_ids": [
        "lmarena"
      ],
      "composite": {
        "score_0_100": 75.6,
        "confidence": "low",
        "components": [
          {
            "source_id": "lmarena",
            "metric": "lmarena_text_overall_rating",
            "normalized_0_100": 75.6,
            "weight": 0.35
          }
        ],
        "derived_from": [
          "lmarena:lmarena_text_overall_rating"
        ],
        "limitations": [
          "This is an external model-intelligence prior, not a RunLocalAI local speed or fit measurement.",
          "Provider papers, human-preference ratings, and LiveBench judgments are not interchangeable with quantized local execution.",
          "Use this score only beside Will-It-Run fit, quantization, context, runtime, and measured tok/s evidence."
        ]
      },
      "scores": [
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_chinese_rating",
          "value": 1426.702831060107,
          "normalized_0_100": 84.28,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 105,
          "lower": 1398.9812307216052,
          "upper": 1454.4244313986087,
          "category": "chinese",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_coding_rating",
          "value": 1383.607911756012,
          "normalized_0_100": 79.16,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 137,
          "lower": 1372.4440471047033,
          "upper": 1394.7717764073207,
          "category": "coding",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_overall_rating",
          "value": 1353.6764275688502,
          "normalized_0_100": 75.6,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 151,
          "lower": 1347.9013264620985,
          "upper": 1359.451528675602,
          "category": "overall",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        }
      ]
    },
    {
      "model_key": "o1-preview",
      "display_name": "o1-preview",
      "provider": "openai",
      "aliases": [
        "o1-preview"
      ],
      "openness": null,
      "license": "Proprietary",
      "parameter_billions": null,
      "context_window": null,
      "modalities": [
        "text"
      ],
      "source_ids": [
        "lmarena"
      ],
      "composite": {
        "score_0_100": 75.5,
        "confidence": "low",
        "components": [
          {
            "source_id": "lmarena",
            "metric": "lmarena_text_overall_rating",
            "normalized_0_100": 75.5,
            "weight": 0.35
          }
        ],
        "derived_from": [
          "lmarena:lmarena_text_overall_rating"
        ],
        "limitations": [
          "This is an external model-intelligence prior, not a RunLocalAI local speed or fit measurement.",
          "Provider papers, human-preference ratings, and LiveBench judgments are not interchangeable with quantized local execution.",
          "Use this score only beside Will-It-Run fit, quantization, context, runtime, and measured tok/s evidence."
        ]
      },
      "scores": [
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_chinese_rating",
          "value": 1324.5717602516609,
          "normalized_0_100": 72.14,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 172,
          "lower": 1312.2050189053703,
          "upper": 1336.9385015979515,
          "category": "chinese",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_coding_rating",
          "value": 1367.1485904917022,
          "normalized_0_100": 77.2,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 154,
          "lower": 1357.8520594144231,
          "upper": 1376.445121568981,
          "category": "coding",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_overall_rating",
          "value": 1352.8218650815286,
          "normalized_0_100": 75.5,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 152,
          "lower": 1347.8109427589534,
          "upper": 1357.8327874041036,
          "category": "overall",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        }
      ]
    },
    {
      "model_key": "o4-mini-2025-04-16",
      "display_name": "o4-mini-2025-04-16",
      "provider": "openai",
      "aliases": [
        "o4-mini-2025-04-16"
      ],
      "openness": null,
      "license": "Proprietary",
      "parameter_billions": null,
      "context_window": null,
      "modalities": [
        "text"
      ],
      "source_ids": [
        "lmarena"
      ],
      "composite": {
        "score_0_100": 75.49,
        "confidence": "low",
        "components": [
          {
            "source_id": "lmarena",
            "metric": "lmarena_text_overall_rating",
            "normalized_0_100": 75.49,
            "weight": 0.35
          }
        ],
        "derived_from": [
          "lmarena:lmarena_text_overall_rating"
        ],
        "limitations": [
          "This is an external model-intelligence prior, not a RunLocalAI local speed or fit measurement.",
          "Provider papers, human-preference ratings, and LiveBench judgments are not interchangeable with quantized local execution.",
          "Use this score only beside Will-It-Run fit, quantization, context, runtime, and measured tok/s evidence."
        ]
      },
      "scores": [
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_chinese_rating",
          "value": 1350.4893339743667,
          "normalized_0_100": 75.22,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 159,
          "lower": 1337.9584087249864,
          "upper": 1363.020259223747,
          "category": "chinese",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_coding_rating",
          "value": 1368.685287198668,
          "normalized_0_100": 77.38,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 150,
          "lower": 1361.8578942240342,
          "upper": 1375.512680173302,
          "category": "coding",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_overall_rating",
          "value": 1352.7146948346901,
          "normalized_0_100": 75.49,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 153,
          "lower": 1348.7844224774371,
          "upper": 1356.6449671919431,
          "category": "overall",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        }
      ]
    },
    {
      "model_key": "arcee-ai-trinity-large-preview",
      "display_name": "arcee-ai/Trinity-Large-Preview",
      "provider": "arcee-ai",
      "aliases": [
        "arcee-ai/Trinity-Large-Preview"
      ],
      "openness": "open",
      "license": "Unknown",
      "parameter_billions": 398.6,
      "context_window": null,
      "modalities": [
        "text"
      ],
      "source_ids": [
        "openevals"
      ],
      "composite": {
        "score_0_100": 75.2,
        "confidence": "low",
        "components": [
          {
            "source_id": "openevals",
            "metric": "openevals_aggregate",
            "normalized_0_100": 75.2,
            "weight": 0.45
          }
        ],
        "derived_from": [
          "openevals:openevals_aggregate"
        ],
        "limitations": [
          "This is an external model-intelligence prior, not a RunLocalAI local speed or fit measurement.",
          "Provider papers, human-preference ratings, and LiveBench judgments are not interchangeable with quantized local execution.",
          "Use this score only beside Will-It-Run fit, quantization, context, runtime, and measured tok/s evidence."
        ]
      },
      "scores": [
        {
          "source_id": "openevals",
          "metric": "mmlu_pro",
          "value": 75.2,
          "normalized_0_100": 75.2,
          "scale": "0-100",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/OpenEvals/leaderboard-data",
          "source_dataset": "OpenEvals/leaderboard-data",
          "source_config": "default",
          "source_split": "train",
          "source_row_count": 105,
          "captured_at": "2026-05-28T11:57:28.766Z",
          "evidence_note": "External model quality prior from OpenEvals; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "openevals",
          "metric": "openevals_aggregate",
          "value": 75.2,
          "normalized_0_100": 75.2,
          "scale": "0-100",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/OpenEvals/leaderboard-data",
          "source_dataset": "OpenEvals/leaderboard-data",
          "source_config": "default",
          "source_split": "train",
          "source_row_count": 105,
          "captured_at": "2026-05-28T11:57:28.766Z",
          "evidence_note": "External model quality prior from OpenEvals; not a RunLocalAI local execution measurement."
        }
      ]
    },
    {
      "model_key": "ares-realm-studios-trinity-large-preview-arstest",
      "display_name": "Ares-Realm-Studios/Trinity-Large-Preview-ARSTEST",
      "provider": "Ares-Realm-Studios",
      "aliases": [
        "Ares-Realm-Studios/Trinity-Large-Preview-ARSTEST"
      ],
      "openness": "open",
      "license": "Unknown",
      "parameter_billions": 398.6,
      "context_window": null,
      "modalities": [
        "text"
      ],
      "source_ids": [
        "openevals"
      ],
      "composite": {
        "score_0_100": 75.2,
        "confidence": "low",
        "components": [
          {
            "source_id": "openevals",
            "metric": "openevals_aggregate",
            "normalized_0_100": 75.2,
            "weight": 0.45
          }
        ],
        "derived_from": [
          "openevals:openevals_aggregate"
        ],
        "limitations": [
          "This is an external model-intelligence prior, not a RunLocalAI local speed or fit measurement.",
          "Provider papers, human-preference ratings, and LiveBench judgments are not interchangeable with quantized local execution.",
          "Use this score only beside Will-It-Run fit, quantization, context, runtime, and measured tok/s evidence."
        ]
      },
      "scores": [
        {
          "source_id": "openevals",
          "metric": "mmlu_pro",
          "value": 75.2,
          "normalized_0_100": 75.2,
          "scale": "0-100",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/OpenEvals/leaderboard-data",
          "source_dataset": "OpenEvals/leaderboard-data",
          "source_config": "default",
          "source_split": "train",
          "source_row_count": 105,
          "captured_at": "2026-05-28T11:57:28.766Z",
          "evidence_note": "External model quality prior from OpenEvals; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "openevals",
          "metric": "openevals_aggregate",
          "value": 75.2,
          "normalized_0_100": 75.2,
          "scale": "0-100",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/OpenEvals/leaderboard-data",
          "source_dataset": "OpenEvals/leaderboard-data",
          "source_config": "default",
          "source_split": "train",
          "source_row_count": 105,
          "captured_at": "2026-05-28T11:57:28.766Z",
          "evidence_note": "External model quality prior from OpenEvals; not a RunLocalAI local execution measurement."
        }
      ]
    },
    {
      "model_key": "opendatalab-mineru2-5-2509-1-2b",
      "display_name": "opendatalab/MinerU2.5-2509-1.2B",
      "provider": "opendatalab",
      "aliases": [
        "opendatalab/MinerU2.5-2509-1.2B"
      ],
      "openness": "open",
      "license": "Unknown",
      "parameter_billions": 1.2,
      "context_window": null,
      "modalities": [
        "text"
      ],
      "source_ids": [
        "openevals"
      ],
      "composite": {
        "score_0_100": 75.2,
        "confidence": "low",
        "components": [
          {
            "source_id": "openevals",
            "metric": "openevals_aggregate",
            "normalized_0_100": 75.2,
            "weight": 0.45
          }
        ],
        "derived_from": [
          "openevals:openevals_aggregate"
        ],
        "limitations": [
          "This is an external model-intelligence prior, not a RunLocalAI local speed or fit measurement.",
          "Provider papers, human-preference ratings, and LiveBench judgments are not interchangeable with quantized local execution.",
          "Use this score only beside Will-It-Run fit, quantization, context, runtime, and measured tok/s evidence."
        ]
      },
      "scores": [
        {
          "source_id": "openevals",
          "metric": "olm_ocr",
          "value": 75.2,
          "normalized_0_100": 75.2,
          "scale": "0-100",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/OpenEvals/leaderboard-data",
          "source_dataset": "OpenEvals/leaderboard-data",
          "source_config": "default",
          "source_split": "train",
          "source_row_count": 105,
          "captured_at": "2026-05-28T11:57:28.766Z",
          "evidence_note": "External model quality prior from OpenEvals; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "openevals",
          "metric": "openevals_aggregate",
          "value": 75.2,
          "normalized_0_100": 75.2,
          "scale": "0-100",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/OpenEvals/leaderboard-data",
          "source_dataset": "OpenEvals/leaderboard-data",
          "source_config": "default",
          "source_split": "train",
          "source_row_count": 105,
          "captured_at": "2026-05-28T11:57:28.766Z",
          "evidence_note": "External model quality prior from OpenEvals; not a RunLocalAI local execution measurement."
        }
      ]
    },
    {
      "model_key": "zai-org-glm-ocr",
      "display_name": "zai-org/GLM-OCR",
      "provider": "zai-org",
      "aliases": [
        "zai-org/GLM-OCR"
      ],
      "openness": "open",
      "license": "Unknown",
      "parameter_billions": null,
      "context_window": null,
      "modalities": [
        "text"
      ],
      "source_ids": [
        "openevals"
      ],
      "composite": {
        "score_0_100": 75.2,
        "confidence": "low",
        "components": [
          {
            "source_id": "openevals",
            "metric": "openevals_aggregate",
            "normalized_0_100": 75.2,
            "weight": 0.45
          }
        ],
        "derived_from": [
          "openevals:openevals_aggregate"
        ],
        "limitations": [
          "This is an external model-intelligence prior, not a RunLocalAI local speed or fit measurement.",
          "Provider papers, human-preference ratings, and LiveBench judgments are not interchangeable with quantized local execution.",
          "Use this score only beside Will-It-Run fit, quantization, context, runtime, and measured tok/s evidence."
        ]
      },
      "scores": [
        {
          "source_id": "openevals",
          "metric": "olm_ocr",
          "value": 75.2,
          "normalized_0_100": 75.2,
          "scale": "0-100",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/OpenEvals/leaderboard-data",
          "source_dataset": "OpenEvals/leaderboard-data",
          "source_config": "default",
          "source_split": "train",
          "source_row_count": 105,
          "captured_at": "2026-05-28T11:57:28.766Z",
          "evidence_note": "External model quality prior from OpenEvals; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "openevals",
          "metric": "openevals_aggregate",
          "value": 75.2,
          "normalized_0_100": 75.2,
          "scale": "0-100",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/OpenEvals/leaderboard-data",
          "source_dataset": "OpenEvals/leaderboard-data",
          "source_config": "default",
          "source_split": "train",
          "source_row_count": 105,
          "captured_at": "2026-05-28T11:57:28.766Z",
          "evidence_note": "External model quality prior from OpenEvals; not a RunLocalAI local execution measurement."
        }
      ]
    },
    {
      "model_key": "step-3",
      "display_name": "step-3",
      "provider": "stepfun",
      "aliases": [
        "step-3"
      ],
      "openness": null,
      "license": "Apache 2.0",
      "parameter_billions": null,
      "context_window": null,
      "modalities": [
        "text"
      ],
      "source_ids": [
        "lmarena"
      ],
      "composite": {
        "score_0_100": 75.09,
        "confidence": "low",
        "components": [
          {
            "source_id": "lmarena",
            "metric": "lmarena_text_overall_rating",
            "normalized_0_100": 75.09,
            "weight": 0.35
          }
        ],
        "derived_from": [
          "lmarena:lmarena_text_overall_rating"
        ],
        "limitations": [
          "This is an external model-intelligence prior, not a RunLocalAI local speed or fit measurement.",
          "Provider papers, human-preference ratings, and LiveBench judgments are not interchangeable with quantized local execution.",
          "Use this score only beside Will-It-Run fit, quantization, context, runtime, and measured tok/s evidence."
        ]
      },
      "scores": [
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_chinese_rating",
          "value": 1398.389569285419,
          "normalized_0_100": 80.92,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 126,
          "lower": 1361.2503737977922,
          "upper": 1435.5287647730456,
          "category": "chinese",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_coding_rating",
          "value": 1365.3845274999373,
          "normalized_0_100": 76.99,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 159,
          "lower": 1348.9104137101413,
          "upper": 1381.8586412897334,
          "category": "coding",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_overall_rating",
          "value": 1349.4144470763931,
          "normalized_0_100": 75.09,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 154,
          "lower": 1342.0843414930405,
          "upper": 1356.7445526597462,
          "category": "overall",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        }
      ]
    },
    {
      "model_key": "nvidia-nemotron-3-nano-30b-a3b-bf16",
      "display_name": "nvidia-nemotron-3-nano-30b-a3b-bf16",
      "provider": "nvidia",
      "aliases": [
        "nvidia-nemotron-3-nano-30b-a3b-bf16"
      ],
      "openness": null,
      "license": "NVIDIA Open Model",
      "parameter_billions": null,
      "context_window": null,
      "modalities": [
        "text"
      ],
      "source_ids": [
        "lmarena"
      ],
      "composite": {
        "score_0_100": 75.08,
        "confidence": "low",
        "components": [
          {
            "source_id": "lmarena",
            "metric": "lmarena_text_overall_rating",
            "normalized_0_100": 75.08,
            "weight": 0.35
          }
        ],
        "derived_from": [
          "lmarena:lmarena_text_overall_rating"
        ],
        "limitations": [
          "This is an external model-intelligence prior, not a RunLocalAI local speed or fit measurement.",
          "Provider papers, human-preference ratings, and LiveBench judgments are not interchangeable with quantized local execution.",
          "Use this score only beside Will-It-Run fit, quantization, context, runtime, and measured tok/s evidence."
        ]
      },
      "scores": [
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_chinese_rating",
          "value": 1392.7162877869807,
          "normalized_0_100": 80.24,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 131,
          "lower": 1372.1817267280492,
          "upper": 1413.2508488459123,
          "category": "chinese",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_coding_rating",
          "value": 1379.3370466399917,
          "normalized_0_100": 78.65,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 140,
          "lower": 1368.972300895999,
          "upper": 1389.7017923839844,
          "category": "coding",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_overall_rating",
          "value": 1349.3436538610258,
          "normalized_0_100": 75.08,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 155,
          "lower": 1343.8769519620228,
          "upper": 1354.8103557600289,
          "category": "overall",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        }
      ]
    },
    {
      "model_key": "reflection-llama-3-1-70b",
      "display_name": "Reflection-Llama-3.1-70B",
      "provider": null,
      "aliases": [
        "Reflection-Llama-3.1-70B"
      ],
      "openness": null,
      "license": null,
      "parameter_billions": null,
      "context_window": null,
      "modalities": [
        "text"
      ],
      "source_ids": [
        "livebench"
      ],
      "composite": {
        "score_0_100": 75,
        "confidence": "low",
        "components": [
          {
            "source_id": "livebench",
            "metric": "livebench_overall_mean_judgment",
            "normalized_0_100": 75,
            "weight": 0.2
          }
        ],
        "derived_from": [
          "livebench:livebench_overall_mean_judgment"
        ],
        "limitations": [
          "This is an external model-intelligence prior, not a RunLocalAI local speed or fit measurement.",
          "Provider papers, human-preference ratings, and LiveBench judgments are not interchangeable with quantized local execution.",
          "Use this score only beside Will-It-Run fit, quantization, context, runtime, and measured tok/s evidence."
        ]
      },
      "scores": [
        {
          "source_id": "livebench",
          "metric": "livebench_category_coding_mean_judgment",
          "value": 0.5,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "coding",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench category aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_category_instruction_following_mean_judgment",
          "value": 1,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "instruction_following",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench category aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_category_language_mean_judgment",
          "value": 1,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "language",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench category aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_overall_mean_judgment",
          "value": 0.75,
          "normalized_0_100": 75,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench judgment aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_task_coding_completion_mean_judgment",
          "value": 0,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "coding",
          "task": "coding_completion",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench task aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_task_connections_mean_judgment",
          "value": 1,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "language",
          "task": "connections",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench task aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_task_lcb_generation_mean_judgment",
          "value": 1,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "coding",
          "task": "LCB_generation",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench task aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_task_story_generation_mean_judgment",
          "value": 1,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "instruction_following",
          "task": "story_generation",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench task aggregate; not a RunLocalAI local execution measurement."
        }
      ]
    },
    {
      "model_key": "claude-sonnet-4-20250514-thinking-32k",
      "display_name": "claude-sonnet-4-20250514-thinking-32k",
      "provider": "anthropic",
      "aliases": [
        "claude-sonnet-4-20250514-thinking-32k"
      ],
      "openness": null,
      "license": "Proprietary",
      "parameter_billions": null,
      "context_window": null,
      "modalities": [
        "text"
      ],
      "source_ids": [
        "lmarena"
      ],
      "composite": {
        "score_0_100": 74.96,
        "confidence": "low",
        "components": [
          {
            "source_id": "lmarena",
            "metric": "lmarena_text_overall_rating",
            "normalized_0_100": 74.96,
            "weight": 0.35
          }
        ],
        "derived_from": [
          "lmarena:lmarena_text_overall_rating"
        ],
        "limitations": [
          "This is an external model-intelligence prior, not a RunLocalAI local speed or fit measurement.",
          "Provider papers, human-preference ratings, and LiveBench judgments are not interchangeable with quantized local execution.",
          "Use this score only beside Will-It-Run fit, quantization, context, runtime, and measured tok/s evidence."
        ]
      },
      "scores": [
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_chinese_rating",
          "value": 1352.6512151365291,
          "normalized_0_100": 75.48,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 157,
          "lower": 1338.442190929815,
          "upper": 1366.8602393432434,
          "category": "chinese",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_coding_rating",
          "value": 1413.0767103751589,
          "normalized_0_100": 82.66,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 105,
          "lower": 1405.2176133238897,
          "upper": 1420.935807426428,
          "category": "coding",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_overall_rating",
          "value": 1348.333444378247,
          "normalized_0_100": 74.96,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 156,
          "lower": 1343.8833409724416,
          "upper": 1352.7835477840526,
          "category": "overall",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        }
      ]
    },
    {
      "model_key": "multilingual-multimodal-nlp-industrialcoder",
      "display_name": "Multilingual-Multimodal-NLP/IndustrialCoder",
      "provider": "Multilingual-Multimodal-NLP",
      "aliases": [
        "Multilingual-Multimodal-NLP/IndustrialCoder"
      ],
      "openness": "open",
      "license": "Unknown",
      "parameter_billions": 32,
      "context_window": null,
      "modalities": [
        "text"
      ],
      "source_ids": [
        "openevals"
      ],
      "composite": {
        "score_0_100": 74.8,
        "confidence": "low",
        "components": [
          {
            "source_id": "openevals",
            "metric": "openevals_aggregate",
            "normalized_0_100": 74.8,
            "weight": 0.45
          }
        ],
        "derived_from": [
          "openevals:openevals_aggregate"
        ],
        "limitations": [
          "This is an external model-intelligence prior, not a RunLocalAI local speed or fit measurement.",
          "Provider papers, human-preference ratings, and LiveBench judgments are not interchangeable with quantized local execution.",
          "Use this score only beside Will-It-Run fit, quantization, context, runtime, and measured tok/s evidence."
        ]
      },
      "scores": [
        {
          "source_id": "openevals",
          "metric": "openevals_aggregate",
          "value": 74.8,
          "normalized_0_100": 74.8,
          "scale": "0-100",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/OpenEvals/leaderboard-data",
          "source_dataset": "OpenEvals/leaderboard-data",
          "source_config": "default",
          "source_split": "train",
          "source_row_count": 105,
          "captured_at": "2026-05-28T11:57:28.766Z",
          "evidence_note": "External model quality prior from OpenEvals; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "openevals",
          "metric": "swe_verified",
          "value": 74.8,
          "normalized_0_100": 74.8,
          "scale": "0-100",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/OpenEvals/leaderboard-data",
          "source_dataset": "OpenEvals/leaderboard-data",
          "source_config": "default",
          "source_split": "train",
          "source_row_count": 105,
          "captured_at": "2026-05-28T11:57:28.766Z",
          "evidence_note": "External model quality prior from OpenEvals; not a RunLocalAI local execution measurement."
        }
      ]
    },
    {
      "model_key": "trinity-large-thinking",
      "display_name": "trinity-large-thinking",
      "provider": null,
      "aliases": [
        "trinity-large-thinking"
      ],
      "openness": null,
      "license": "Apache 2.0",
      "parameter_billions": null,
      "context_window": null,
      "modalities": [
        "text"
      ],
      "source_ids": [
        "lmarena"
      ],
      "composite": {
        "score_0_100": 74.77,
        "confidence": "low",
        "components": [
          {
            "source_id": "lmarena",
            "metric": "lmarena_text_overall_rating",
            "normalized_0_100": 74.77,
            "weight": 0.35
          }
        ],
        "derived_from": [
          "lmarena:lmarena_text_overall_rating"
        ],
        "limitations": [
          "This is an external model-intelligence prior, not a RunLocalAI local speed or fit measurement.",
          "Provider papers, human-preference ratings, and LiveBench judgments are not interchangeable with quantized local execution.",
          "Use this score only beside Will-It-Run fit, quantization, context, runtime, and measured tok/s evidence."
        ]
      },
      "scores": [
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_chinese_rating",
          "value": 1384.1864711725366,
          "normalized_0_100": 79.23,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 138,
          "lower": 1364.2679401407756,
          "upper": 1404.1050022042975,
          "category": "chinese",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_coding_rating",
          "value": 1366.7287629795499,
          "normalized_0_100": 77.15,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 155,
          "lower": 1357.2066041004523,
          "upper": 1376.2509218586472,
          "category": "coding",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_overall_rating",
          "value": 1346.7133082870914,
          "normalized_0_100": 74.77,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 157,
          "lower": 1341.3398512900626,
          "upper": 1352.0867652841202,
          "category": "overall",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        }
      ]
    },
    {
      "model_key": "minimax-m1",
      "display_name": "minimax-m1",
      "provider": "minimax",
      "aliases": [
        "minimax-m1"
      ],
      "openness": null,
      "license": "Apache 2.0",
      "parameter_billions": null,
      "context_window": null,
      "modalities": [
        "text"
      ],
      "source_ids": [
        "lmarena"
      ],
      "composite": {
        "score_0_100": 74.26,
        "confidence": "low",
        "components": [
          {
            "source_id": "lmarena",
            "metric": "lmarena_text_overall_rating",
            "normalized_0_100": 74.26,
            "weight": 0.35
          }
        ],
        "derived_from": [
          "lmarena:lmarena_text_overall_rating"
        ],
        "limitations": [
          "This is an external model-intelligence prior, not a RunLocalAI local speed or fit measurement.",
          "Provider papers, human-preference ratings, and LiveBench judgments are not interchangeable with quantized local execution.",
          "Use this score only beside Will-It-Run fit, quantization, context, runtime, and measured tok/s evidence."
        ]
      },
      "scores": [
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_chinese_rating",
          "value": 1359.4158957665118,
          "normalized_0_100": 76.28,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 151,
          "lower": 1345.346701902627,
          "upper": 1373.4850896303965,
          "category": "chinese",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_coding_rating",
          "value": 1359.1582115681051,
          "normalized_0_100": 76.25,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 166,
          "lower": 1351.575438374674,
          "upper": 1366.7409847615363,
          "category": "coding",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_overall_rating",
          "value": 1342.3817989451952,
          "normalized_0_100": 74.26,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 158,
          "lower": 1338.1362016494131,
          "upper": 1346.6273962409773,
          "category": "overall",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        }
      ]
    },
    {
      "model_key": "minimax-m2",
      "display_name": "minimax-m2",
      "provider": "minimax",
      "aliases": [
        "minimax-m2"
      ],
      "openness": null,
      "license": "Apache 2.0",
      "parameter_billions": null,
      "context_window": null,
      "modalities": [
        "text"
      ],
      "source_ids": [
        "lmarena"
      ],
      "composite": {
        "score_0_100": 74.22,
        "confidence": "low",
        "components": [
          {
            "source_id": "lmarena",
            "metric": "lmarena_text_overall_rating",
            "normalized_0_100": 74.22,
            "weight": 0.35
          }
        ],
        "derived_from": [
          "lmarena:lmarena_text_overall_rating"
        ],
        "limitations": [
          "This is an external model-intelligence prior, not a RunLocalAI local speed or fit measurement.",
          "Provider papers, human-preference ratings, and LiveBench judgments are not interchangeable with quantized local execution.",
          "Use this score only beside Will-It-Run fit, quantization, context, runtime, and measured tok/s evidence."
        ]
      },
      "scores": [
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_chinese_rating",
          "value": 1384.1919642121347,
          "normalized_0_100": 79.23,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 137,
          "lower": 1348.8225323094857,
          "upper": 1419.5613961147837,
          "category": "chinese",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_coding_rating",
          "value": 1373.9358081549262,
          "normalized_0_100": 78.01,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 146,
          "lower": 1358.931989841707,
          "upper": 1388.9396264681454,
          "category": "coding",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_overall_rating",
          "value": 1342.1107785634408,
          "normalized_0_100": 74.22,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 159,
          "lower": 1334.4347962905072,
          "upper": 1349.7867608363745,
          "category": "overall",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        }
      ]
    },
    {
      "model_key": "gpt-4-1-mini-2025-04-14",
      "display_name": "gpt-4.1-mini-2025-04-14",
      "provider": "openai",
      "aliases": [
        "gpt-4.1-mini-2025-04-14"
      ],
      "openness": null,
      "license": "Proprietary",
      "parameter_billions": null,
      "context_window": null,
      "modalities": [
        "text"
      ],
      "source_ids": [
        "lmarena"
      ],
      "composite": {
        "score_0_100": 74.03,
        "confidence": "low",
        "components": [
          {
            "source_id": "lmarena",
            "metric": "lmarena_text_overall_rating",
            "normalized_0_100": 74.03,
            "weight": 0.35
          }
        ],
        "derived_from": [
          "lmarena:lmarena_text_overall_rating"
        ],
        "limitations": [
          "This is an external model-intelligence prior, not a RunLocalAI local speed or fit measurement.",
          "Provider papers, human-preference ratings, and LiveBench judgments are not interchangeable with quantized local execution.",
          "Use this score only beside Will-It-Run fit, quantization, context, runtime, and measured tok/s evidence."
        ]
      },
      "scores": [
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_chinese_rating",
          "value": 1329.0389029261864,
          "normalized_0_100": 72.67,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 170,
          "lower": 1315.677231802703,
          "upper": 1342.4005740496698,
          "category": "chinese",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_coding_rating",
          "value": 1367.5199218883515,
          "normalized_0_100": 77.25,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 152,
          "lower": 1360.0765031894537,
          "upper": 1374.9633405872491,
          "category": "coding",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_overall_rating",
          "value": 1340.4720160507477,
          "normalized_0_100": 74.03,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 160,
          "lower": 1336.2041672039923,
          "upper": 1344.7398648975031,
          "category": "overall",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        }
      ]
    },
    {
      "model_key": "qwen3-32b",
      "display_name": "qwen3-32b",
      "provider": "alibaba",
      "aliases": [
        "qwen3-32b"
      ],
      "openness": null,
      "license": "Apache 2.0",
      "parameter_billions": null,
      "context_window": null,
      "modalities": [
        "text"
      ],
      "source_ids": [
        "lmarena"
      ],
      "composite": {
        "score_0_100": 73.98,
        "confidence": "low",
        "components": [
          {
            "source_id": "lmarena",
            "metric": "lmarena_text_overall_rating",
            "normalized_0_100": 73.98,
            "weight": 0.35
          }
        ],
        "derived_from": [
          "lmarena:lmarena_text_overall_rating"
        ],
        "limitations": [
          "This is an external model-intelligence prior, not a RunLocalAI local speed or fit measurement.",
          "Provider papers, human-preference ratings, and LiveBench judgments are not interchangeable with quantized local execution.",
          "Use this score only beside Will-It-Run fit, quantization, context, runtime, and measured tok/s evidence."
        ]
      },
      "scores": [
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_chinese_rating",
          "value": 1357.6555099963057,
          "normalized_0_100": 76.07,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 152,
          "lower": 1319.9320622442297,
          "upper": 1395.378957748382,
          "category": "chinese",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_coding_rating",
          "value": 1358.1281519457166,
          "normalized_0_100": 76.13,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 167,
          "lower": 1333.8757643818815,
          "upper": 1382.380539509552,
          "category": "coding",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_overall_rating",
          "value": 1340.0222798303794,
          "normalized_0_100": 73.98,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 161,
          "lower": 1330.7017388637637,
          "upper": 1349.3428207969948,
          "category": "overall",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        }
      ]
    },
    {
      "model_key": "mistral-small-2506",
      "display_name": "mistral-small-2506",
      "provider": "mistral",
      "aliases": [
        "mistral-small-2506"
      ],
      "openness": null,
      "license": "Apache 2.0",
      "parameter_billions": null,
      "context_window": null,
      "modalities": [
        "text"
      ],
      "source_ids": [
        "lmarena"
      ],
      "composite": {
        "score_0_100": 73.86,
        "confidence": "low",
        "components": [
          {
            "source_id": "lmarena",
            "metric": "lmarena_text_overall_rating",
            "normalized_0_100": 73.86,
            "weight": 0.35
          }
        ],
        "derived_from": [
          "lmarena:lmarena_text_overall_rating"
        ],
        "limitations": [
          "This is an external model-intelligence prior, not a RunLocalAI local speed or fit measurement.",
          "Provider papers, human-preference ratings, and LiveBench judgments are not interchangeable with quantized local execution.",
          "Use this score only beside Will-It-Run fit, quantization, context, runtime, and measured tok/s evidence."
        ]
      },
      "scores": [
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_chinese_rating",
          "value": 1339.4551138112245,
          "normalized_0_100": 73.91,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 164,
          "lower": 1319.5599888299157,
          "upper": 1359.3502387925334,
          "category": "chinese",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_coding_rating",
          "value": 1363.1538695289228,
          "normalized_0_100": 76.73,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 160,
          "lower": 1353.1113025538189,
          "upper": 1373.1964365040265,
          "category": "coding",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_overall_rating",
          "value": 1339.0494776954301,
          "normalized_0_100": 73.86,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 162,
          "lower": 1333.8840493502244,
          "upper": 1344.214906040636,
          "category": "overall",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        }
      ]
    },
    {
      "model_key": "claude-sonnet-4-20250514",
      "display_name": "claude-sonnet-4-20250514",
      "provider": "anthropic",
      "aliases": [
        "claude-sonnet-4-20250514"
      ],
      "openness": null,
      "license": "Proprietary",
      "parameter_billions": null,
      "context_window": null,
      "modalities": [
        "text"
      ],
      "source_ids": [
        "lmarena"
      ],
      "composite": {
        "score_0_100": 73.7,
        "confidence": "low",
        "components": [
          {
            "source_id": "lmarena",
            "metric": "lmarena_text_overall_rating",
            "normalized_0_100": 73.7,
            "weight": 0.35
          }
        ],
        "derived_from": [
          "lmarena:lmarena_text_overall_rating"
        ],
        "limitations": [
          "This is an external model-intelligence prior, not a RunLocalAI local speed or fit measurement.",
          "Provider papers, human-preference ratings, and LiveBench judgments are not interchangeable with quantized local execution.",
          "Use this score only beside Will-It-Run fit, quantization, context, runtime, and measured tok/s evidence."
        ]
      },
      "scores": [
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_chinese_rating",
          "value": 1343.5048162862288,
          "normalized_0_100": 74.39,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 163,
          "lower": 1330.3637294989853,
          "upper": 1356.6459030734723,
          "category": "chinese",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_coding_rating",
          "value": 1379.6996995116692,
          "normalized_0_100": 78.69,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 139,
          "lower": 1372.3320310698136,
          "upper": 1387.0673679535246,
          "category": "coding",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_overall_rating",
          "value": 1337.6648664258003,
          "normalized_0_100": 73.7,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 164,
          "lower": 1333.2768629585516,
          "upper": 1342.0528698930489,
          "category": "overall",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        }
      ]
    },
    {
      "model_key": "nvidia-llama-3-3-nemotron-super-49b-v1-5",
      "display_name": "nvidia-llama-3.3-nemotron-super-49b-v1.5",
      "provider": "nvidia",
      "aliases": [
        "nvidia-llama-3.3-nemotron-super-49b-v1.5"
      ],
      "openness": null,
      "license": "Nvidia Open",
      "parameter_billions": null,
      "context_window": null,
      "modalities": [
        "text"
      ],
      "source_ids": [
        "lmarena"
      ],
      "composite": {
        "score_0_100": 73.7,
        "confidence": "low",
        "components": [
          {
            "source_id": "lmarena",
            "metric": "lmarena_text_overall_rating",
            "normalized_0_100": 73.7,
            "weight": 0.35
          }
        ],
        "derived_from": [
          "lmarena:lmarena_text_overall_rating"
        ],
        "limitations": [
          "This is an external model-intelligence prior, not a RunLocalAI local speed or fit measurement.",
          "Provider papers, human-preference ratings, and LiveBench judgments are not interchangeable with quantized local execution.",
          "Use this score only beside Will-It-Run fit, quantization, context, runtime, and measured tok/s evidence."
        ]
      },
      "scores": [
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_coding_rating",
          "value": 1356.6463751926199,
          "normalized_0_100": 75.95,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 168,
          "lower": 1334.7363137064774,
          "upper": 1378.556436678762,
          "category": "coding",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_overall_rating",
          "value": 1337.7085224688278,
          "normalized_0_100": 73.7,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 163,
          "lower": 1327.738932273387,
          "upper": 1347.6781126642684,
          "category": "overall",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        }
      ]
    },
    {
      "model_key": "qwen-qwen3-5-397b-a17b",
      "display_name": "Qwen/Qwen3.5-397B-A17B",
      "provider": "Qwen",
      "aliases": [
        "Qwen/Qwen3.5-397B-A17B"
      ],
      "openness": "open",
      "license": "Unknown",
      "parameter_billions": 403.4,
      "context_window": null,
      "modalities": [
        "text"
      ],
      "source_ids": [
        "openevals"
      ],
      "composite": {
        "score_0_100": 73.57,
        "confidence": "low",
        "components": [
          {
            "source_id": "openevals",
            "metric": "openevals_aggregate",
            "normalized_0_100": 73.57,
            "weight": 0.45
          }
        ],
        "derived_from": [
          "openevals:openevals_aggregate"
        ],
        "limitations": [
          "This is an external model-intelligence prior, not a RunLocalAI local speed or fit measurement.",
          "Provider papers, human-preference ratings, and LiveBench judgments are not interchangeable with quantized local execution.",
          "Use this score only beside Will-It-Run fit, quantization, context, runtime, and measured tok/s evidence."
        ]
      },
      "scores": [
        {
          "source_id": "openevals",
          "metric": "aime2026",
          "value": 93.33,
          "normalized_0_100": 93.33,
          "scale": "0-100",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/OpenEvals/leaderboard-data",
          "source_dataset": "OpenEvals/leaderboard-data",
          "source_config": "default",
          "source_split": "train",
          "source_row_count": 105,
          "captured_at": "2026-05-28T11:57:28.766Z",
          "evidence_note": "External model quality prior from OpenEvals; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "openevals",
          "metric": "gpqa",
          "value": 88.4,
          "normalized_0_100": 88.4,
          "scale": "0-100",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/OpenEvals/leaderboard-data",
          "source_dataset": "OpenEvals/leaderboard-data",
          "source_config": "default",
          "source_split": "train",
          "source_row_count": 105,
          "captured_at": "2026-05-28T11:57:28.766Z",
          "evidence_note": "External model quality prior from OpenEvals; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "openevals",
          "metric": "hle",
          "value": 28.7,
          "normalized_0_100": 28.7,
          "scale": "0-100",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/OpenEvals/leaderboard-data",
          "source_dataset": "OpenEvals/leaderboard-data",
          "source_config": "default",
          "source_split": "train",
          "source_row_count": 105,
          "captured_at": "2026-05-28T11:57:28.766Z",
          "evidence_note": "External model quality prior from OpenEvals; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "openevals",
          "metric": "hmmt2026",
          "value": 87.88,
          "normalized_0_100": 87.88,
          "scale": "0-100",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/OpenEvals/leaderboard-data",
          "source_dataset": "OpenEvals/leaderboard-data",
          "source_config": "default",
          "source_split": "train",
          "source_row_count": 105,
          "captured_at": "2026-05-28T11:57:28.766Z",
          "evidence_note": "External model quality prior from OpenEvals; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "openevals",
          "metric": "mmlu_pro",
          "value": 87.8,
          "normalized_0_100": 87.8,
          "scale": "0-100",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/OpenEvals/leaderboard-data",
          "source_dataset": "OpenEvals/leaderboard-data",
          "source_config": "default",
          "source_split": "train",
          "source_row_count": 105,
          "captured_at": "2026-05-28T11:57:28.766Z",
          "evidence_note": "External model quality prior from OpenEvals; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "openevals",
          "metric": "openevals_aggregate",
          "value": 73.57,
          "normalized_0_100": 73.57,
          "scale": "0-100",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/OpenEvals/leaderboard-data",
          "source_dataset": "OpenEvals/leaderboard-data",
          "source_config": "default",
          "source_split": "train",
          "source_row_count": 105,
          "captured_at": "2026-05-28T11:57:28.766Z",
          "evidence_note": "External model quality prior from OpenEvals; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "openevals",
          "metric": "swe_verified",
          "value": 76.4,
          "normalized_0_100": 76.4,
          "scale": "0-100",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/OpenEvals/leaderboard-data",
          "source_dataset": "OpenEvals/leaderboard-data",
          "source_config": "default",
          "source_split": "train",
          "source_row_count": 105,
          "captured_at": "2026-05-28T11:57:28.766Z",
          "evidence_note": "External model quality prior from OpenEvals; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "openevals",
          "metric": "terminal_bench",
          "value": 52.5,
          "normalized_0_100": 52.5,
          "scale": "0-100",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/OpenEvals/leaderboard-data",
          "source_dataset": "OpenEvals/leaderboard-data",
          "source_config": "default",
          "source_split": "train",
          "source_row_count": 105,
          "captured_at": "2026-05-28T11:57:28.766Z",
          "evidence_note": "External model quality prior from OpenEvals; not a RunLocalAI local execution measurement."
        }
      ]
    },
    {
      "model_key": "o3-mini-high",
      "display_name": "o3-mini-high",
      "provider": "openai",
      "aliases": [
        "o3-mini-high"
      ],
      "openness": null,
      "license": "Proprietary",
      "parameter_billions": null,
      "context_window": null,
      "modalities": [
        "text"
      ],
      "source_ids": [
        "lmarena"
      ],
      "composite": {
        "score_0_100": 73.56,
        "confidence": "low",
        "components": [
          {
            "source_id": "lmarena",
            "metric": "lmarena_text_overall_rating",
            "normalized_0_100": 73.56,
            "weight": 0.35
          }
        ],
        "derived_from": [
          "lmarena:lmarena_text_overall_rating"
        ],
        "limitations": [
          "This is an external model-intelligence prior, not a RunLocalAI local speed or fit measurement.",
          "Provider papers, human-preference ratings, and LiveBench judgments are not interchangeable with quantized local execution.",
          "Use this score only beside Will-It-Run fit, quantization, context, runtime, and measured tok/s evidence."
        ]
      },
      "scores": [
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_chinese_rating",
          "value": 1378.375595308978,
          "normalized_0_100": 78.54,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 144,
          "lower": 1361.1845516415742,
          "upper": 1395.566638976382,
          "category": "chinese",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_coding_rating",
          "value": 1378.6962771648514,
          "normalized_0_100": 78.58,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 141,
          "lower": 1367.1224312925783,
          "upper": 1390.2701230371244,
          "category": "coding",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_overall_rating",
          "value": 1336.5598839514857,
          "normalized_0_100": 73.56,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 165,
          "lower": 1331.3344016214996,
          "upper": 1341.7853662814719,
          "category": "overall",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        }
      ]
    },
    {
      "model_key": "step-1o-turbo-202506",
      "display_name": "step-1o-turbo-202506",
      "provider": "stepfun",
      "aliases": [
        "step-1o-turbo-202506"
      ],
      "openness": null,
      "license": "Proprietary",
      "parameter_billions": null,
      "context_window": null,
      "modalities": [
        "text"
      ],
      "source_ids": [
        "lmarena"
      ],
      "composite": {
        "score_0_100": 73.38,
        "confidence": "low",
        "components": [
          {
            "source_id": "lmarena",
            "metric": "lmarena_text_overall_rating",
            "normalized_0_100": 73.38,
            "weight": 0.35
          }
        ],
        "derived_from": [
          "lmarena:lmarena_text_overall_rating"
        ],
        "limitations": [
          "This is an external model-intelligence prior, not a RunLocalAI local speed or fit measurement.",
          "Provider papers, human-preference ratings, and LiveBench judgments are not interchangeable with quantized local execution.",
          "Use this score only beside Will-It-Run fit, quantization, context, runtime, and measured tok/s evidence."
        ]
      },
      "scores": [
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_chinese_rating",
          "value": 1362.4266213878818,
          "normalized_0_100": 76.64,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 150,
          "lower": 1335.0924497941107,
          "upper": 1389.7607929816531,
          "category": "chinese",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_coding_rating",
          "value": 1340.3547927650495,
          "normalized_0_100": 74.02,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 175,
          "lower": 1325.5445735307171,
          "upper": 1355.1650119993822,
          "category": "coding",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_overall_rating",
          "value": 1335.0433456830078,
          "normalized_0_100": 73.38,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 166,
          "lower": 1328.230502036413,
          "upper": 1341.8561893296028,
          "category": "overall",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        }
      ]
    },
    {
      "model_key": "trinity-large-preview",
      "display_name": "trinity-large-preview",
      "provider": null,
      "aliases": [
        "trinity-large-preview"
      ],
      "openness": null,
      "license": "Apache 2.0",
      "parameter_billions": null,
      "context_window": null,
      "modalities": [
        "text"
      ],
      "source_ids": [
        "lmarena"
      ],
      "composite": {
        "score_0_100": 73.24,
        "confidence": "low",
        "components": [
          {
            "source_id": "lmarena",
            "metric": "lmarena_text_overall_rating",
            "normalized_0_100": 73.24,
            "weight": 0.35
          }
        ],
        "derived_from": [
          "lmarena:lmarena_text_overall_rating"
        ],
        "limitations": [
          "This is an external model-intelligence prior, not a RunLocalAI local speed or fit measurement.",
          "Provider papers, human-preference ratings, and LiveBench judgments are not interchangeable with quantized local execution.",
          "Use this score only beside Will-It-Run fit, quantization, context, runtime, and measured tok/s evidence."
        ]
      },
      "scores": [
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_chinese_rating",
          "value": 1333.7703916390692,
          "normalized_0_100": 73.23,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 167,
          "lower": 1315.2389908052592,
          "upper": 1352.3017924728792,
          "category": "chinese",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_coding_rating",
          "value": 1374.917537332063,
          "normalized_0_100": 78.13,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 144,
          "lower": 1366.747187281898,
          "upper": 1383.087887382228,
          "category": "coding",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_overall_rating",
          "value": 1333.8662603626763,
          "normalized_0_100": 73.24,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 168,
          "lower": 1329.335694865416,
          "upper": 1338.3968258599364,
          "category": "overall",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        }
      ]
    },
    {
      "model_key": "glm-4-5v",
      "display_name": "glm-4.5v",
      "provider": "zai",
      "aliases": [
        "glm-4.5v"
      ],
      "openness": null,
      "license": "MIT",
      "parameter_billions": null,
      "context_window": null,
      "modalities": [
        "text"
      ],
      "source_ids": [
        "lmarena"
      ],
      "composite": {
        "score_0_100": 73.22,
        "confidence": "low",
        "components": [
          {
            "source_id": "lmarena",
            "metric": "lmarena_text_overall_rating",
            "normalized_0_100": 73.22,
            "weight": 0.35
          }
        ],
        "derived_from": [
          "lmarena:lmarena_text_overall_rating"
        ],
        "limitations": [
          "This is an external model-intelligence prior, not a RunLocalAI local speed or fit measurement.",
          "Provider papers, human-preference ratings, and LiveBench judgments are not interchangeable with quantized local execution.",
          "Use this score only beside Will-It-Run fit, quantization, context, runtime, and measured tok/s evidence."
        ]
      },
      "scores": [
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_chinese_rating",
          "value": 1326.10120279805,
          "normalized_0_100": 72.32,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 171,
          "lower": 1284.4508617603424,
          "upper": 1367.7515438357573,
          "category": "chinese",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_coding_rating",
          "value": 1349.8725073859243,
          "normalized_0_100": 75.15,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 171,
          "lower": 1331.4883519818604,
          "upper": 1368.2566627899882,
          "category": "coding",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_overall_rating",
          "value": 1333.679786251105,
          "normalized_0_100": 73.22,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 169,
          "lower": 1325.3550438161024,
          "upper": 1342.0045286861077,
          "category": "overall",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        }
      ]
    },
    {
      "model_key": "ring-flash-2-0",
      "display_name": "ring-flash-2.0",
      "provider": "ant-group",
      "aliases": [
        "ring-flash-2.0"
      ],
      "openness": null,
      "license": "MIT",
      "parameter_billions": null,
      "context_window": null,
      "modalities": [
        "text"
      ],
      "source_ids": [
        "lmarena"
      ],
      "composite": {
        "score_0_100": 72.97,
        "confidence": "low",
        "components": [
          {
            "source_id": "lmarena",
            "metric": "lmarena_text_overall_rating",
            "normalized_0_100": 72.97,
            "weight": 0.35
          }
        ],
        "derived_from": [
          "lmarena:lmarena_text_overall_rating"
        ],
        "limitations": [
          "This is an external model-intelligence prior, not a RunLocalAI local speed or fit measurement.",
          "Provider papers, human-preference ratings, and LiveBench judgments are not interchangeable with quantized local execution.",
          "Use this score only beside Will-It-Run fit, quantization, context, runtime, and measured tok/s evidence."
        ]
      },
      "scores": [
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_chinese_rating",
          "value": 1405.4892541798008,
          "normalized_0_100": 81.76,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 122,
          "lower": 1374.6219085304635,
          "upper": 1436.356599829138,
          "category": "chinese",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_coding_rating",
          "value": 1365.7414098168338,
          "normalized_0_100": 77.03,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 157,
          "lower": 1351.0108483349927,
          "upper": 1380.4719712986746,
          "category": "coding",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_overall_rating",
          "value": 1331.5657238738836,
          "normalized_0_100": 72.97,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 171,
          "lower": 1324.4211874020998,
          "upper": 1338.7102603456674,
          "category": "overall",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        }
      ]
    },
    {
      "model_key": "glm-4-plus-0111",
      "display_name": "glm-4-plus-0111",
      "provider": "zai",
      "aliases": [
        "glm-4-plus-0111"
      ],
      "openness": null,
      "license": "Proprietary",
      "parameter_billions": null,
      "context_window": null,
      "modalities": [
        "text"
      ],
      "source_ids": [
        "lmarena"
      ],
      "composite": {
        "score_0_100": 72.89,
        "confidence": "low",
        "components": [
          {
            "source_id": "lmarena",
            "metric": "lmarena_text_overall_rating",
            "normalized_0_100": 72.89,
            "weight": 0.35
          }
        ],
        "derived_from": [
          "lmarena:lmarena_text_overall_rating"
        ],
        "limitations": [
          "This is an external model-intelligence prior, not a RunLocalAI local speed or fit measurement.",
          "Provider papers, human-preference ratings, and LiveBench judgments are not interchangeable with quantized local execution.",
          "Use this score only beside Will-It-Run fit, quantization, context, runtime, and measured tok/s evidence."
        ]
      },
      "scores": [
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_chinese_rating",
          "value": 1388.349843333519,
          "normalized_0_100": 79.72,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 135,
          "lower": 1358.5291733414117,
          "upper": 1418.170513325626,
          "category": "chinese",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_coding_rating",
          "value": 1287.1550462014397,
          "normalized_0_100": 67.69,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 213,
          "lower": 1269.1211401686246,
          "upper": 1305.1889522342547,
          "category": "coding",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_overall_rating",
          "value": 1330.853470143252,
          "normalized_0_100": 72.89,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 173,
          "lower": 1322.462851061299,
          "upper": 1339.244089225205,
          "category": "overall",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        }
      ]
    },
    {
      "model_key": "lm-provers-qed-nano",
      "display_name": "lm-provers/QED-Nano",
      "provider": "lm-provers",
      "aliases": [
        "lm-provers/QED-Nano"
      ],
      "openness": "open",
      "license": "Unknown",
      "parameter_billions": 4,
      "context_window": null,
      "modalities": [
        "text"
      ],
      "source_ids": [
        "openevals"
      ],
      "composite": {
        "score_0_100": 72.69,
        "confidence": "low",
        "components": [
          {
            "source_id": "openevals",
            "metric": "openevals_aggregate",
            "normalized_0_100": 72.69,
            "weight": 0.45
          }
        ],
        "derived_from": [
          "openevals:openevals_aggregate"
        ],
        "limitations": [
          "This is an external model-intelligence prior, not a RunLocalAI local speed or fit measurement.",
          "Provider papers, human-preference ratings, and LiveBench judgments are not interchangeable with quantized local execution.",
          "Use this score only beside Will-It-Run fit, quantization, context, runtime, and measured tok/s evidence."
        ]
      },
      "scores": [
        {
          "source_id": "openevals",
          "metric": "aime2026",
          "value": 82.5,
          "normalized_0_100": 82.5,
          "scale": "0-100",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/OpenEvals/leaderboard-data",
          "source_dataset": "OpenEvals/leaderboard-data",
          "source_config": "default",
          "source_split": "train",
          "source_row_count": 105,
          "captured_at": "2026-05-28T11:57:28.766Z",
          "evidence_note": "External model quality prior from OpenEvals; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "openevals",
          "metric": "hmmt2026",
          "value": 62.88,
          "normalized_0_100": 62.88,
          "scale": "0-100",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/OpenEvals/leaderboard-data",
          "source_dataset": "OpenEvals/leaderboard-data",
          "source_config": "default",
          "source_split": "train",
          "source_row_count": 105,
          "captured_at": "2026-05-28T11:57:28.766Z",
          "evidence_note": "External model quality prior from OpenEvals; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "openevals",
          "metric": "openevals_aggregate",
          "value": 72.69,
          "normalized_0_100": 72.69,
          "scale": "0-100",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/OpenEvals/leaderboard-data",
          "source_dataset": "OpenEvals/leaderboard-data",
          "source_config": "default",
          "source_split": "train",
          "source_row_count": 105,
          "captured_at": "2026-05-28T11:57:28.766Z",
          "evidence_note": "External model quality prior from OpenEvals; not a RunLocalAI local execution measurement."
        }
      ]
    },
    {
      "model_key": "qwen-plus-0125",
      "display_name": "qwen-plus-0125",
      "provider": "alibaba",
      "aliases": [
        "qwen-plus-0125"
      ],
      "openness": null,
      "license": "Proprietary",
      "parameter_billions": null,
      "context_window": null,
      "modalities": [
        "text"
      ],
      "source_ids": [
        "lmarena"
      ],
      "composite": {
        "score_0_100": 72.36,
        "confidence": "low",
        "components": [
          {
            "source_id": "lmarena",
            "metric": "lmarena_text_overall_rating",
            "normalized_0_100": 72.36,
            "weight": 0.35
          }
        ],
        "derived_from": [
          "lmarena:lmarena_text_overall_rating"
        ],
        "limitations": [
          "This is an external model-intelligence prior, not a RunLocalAI local speed or fit measurement.",
          "Provider papers, human-preference ratings, and LiveBench judgments are not interchangeable with quantized local execution.",
          "Use this score only beside Will-It-Run fit, quantization, context, runtime, and measured tok/s evidence."
        ]
      },
      "scores": [
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_chinese_rating",
          "value": 1346.6514187245193,
          "normalized_0_100": 74.76,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 160,
          "lower": 1319.1576845297498,
          "upper": 1374.1451529192886,
          "category": "chinese",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_coding_rating",
          "value": 1328.559546462814,
          "normalized_0_100": 72.61,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 180,
          "lower": 1310.3038324818194,
          "upper": 1346.8152604438087,
          "category": "coding",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_overall_rating",
          "value": 1326.4283827893064,
          "normalized_0_100": 72.36,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 176,
          "lower": 1318.1364455197213,
          "upper": 1334.7203200588913,
          "category": "overall",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        }
      ]
    },
    {
      "model_key": "gemini-2-0-flash",
      "display_name": "gemini-2.0-flash",
      "provider": null,
      "aliases": [
        "gemini-2.0-flash"
      ],
      "openness": null,
      "license": null,
      "parameter_billions": null,
      "context_window": null,
      "modalities": [
        "text"
      ],
      "source_ids": [
        "livebench"
      ],
      "composite": {
        "score_0_100": 72.12,
        "confidence": "low",
        "components": [
          {
            "source_id": "livebench",
            "metric": "livebench_overall_mean_judgment",
            "normalized_0_100": 72.12,
            "weight": 0.2
          }
        ],
        "derived_from": [
          "livebench:livebench_overall_mean_judgment"
        ],
        "limitations": [
          "This is an external model-intelligence prior, not a RunLocalAI local speed or fit measurement.",
          "Provider papers, human-preference ratings, and LiveBench judgments are not interchangeable with quantized local execution.",
          "Use this score only beside Will-It-Run fit, quantization, context, runtime, and measured tok/s evidence."
        ]
      },
      "scores": [
        {
          "source_id": "livebench",
          "metric": "livebench_category_language_mean_judgment",
          "value": 0.721212,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "language",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench category aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_overall_mean_judgment",
          "value": 0.721212,
          "normalized_0_100": 72.12,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench judgment aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_task_plot_unscrambling_mean_judgment",
          "value": 0.163636,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "language",
          "task": "plot_unscrambling",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench task aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_task_typos_mean_judgment",
          "value": 1,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "language",
          "task": "typos",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench task aggregate; not a RunLocalAI local execution measurement."
        }
      ]
    },
    {
      "model_key": "step-2-16k-exp-202412",
      "display_name": "step-2-16k-exp-202412",
      "provider": "stepfun",
      "aliases": [
        "step-2-16k-exp-202412"
      ],
      "openness": null,
      "license": "Proprietary",
      "parameter_billions": null,
      "context_window": null,
      "modalities": [
        "text"
      ],
      "source_ids": [
        "lmarena"
      ],
      "composite": {
        "score_0_100": 71.71,
        "confidence": "low",
        "components": [
          {
            "source_id": "lmarena",
            "metric": "lmarena_text_overall_rating",
            "normalized_0_100": 71.71,
            "weight": 0.35
          }
        ],
        "derived_from": [
          "lmarena:lmarena_text_overall_rating"
        ],
        "limitations": [
          "This is an external model-intelligence prior, not a RunLocalAI local speed or fit measurement.",
          "Provider papers, human-preference ratings, and LiveBench judgments are not interchangeable with quantized local execution.",
          "Use this score only beside Will-It-Run fit, quantization, context, runtime, and measured tok/s evidence."
        ]
      },
      "scores": [
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_chinese_rating",
          "value": 1330.1782362507056,
          "normalized_0_100": 72.81,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 169,
          "lower": 1297.8431498694777,
          "upper": 1362.5133226319335,
          "category": "chinese",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_coding_rating",
          "value": 1316.726209605154,
          "normalized_0_100": 71.21,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 188,
          "lower": 1297.0021036157987,
          "upper": 1336.4503155945097,
          "category": "coding",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_overall_rating",
          "value": 1320.9593373498044,
          "normalized_0_100": 71.71,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 177,
          "lower": 1312.4084576564514,
          "upper": 1329.5102170431574,
          "category": "overall",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        }
      ]
    },
    {
      "model_key": "gpt-5-nano-high",
      "display_name": "gpt-5-nano-high",
      "provider": "openai",
      "aliases": [
        "gpt-5-nano-high"
      ],
      "openness": null,
      "license": "Proprietary",
      "parameter_billions": null,
      "context_window": null,
      "modalities": [
        "text"
      ],
      "source_ids": [
        "lmarena"
      ],
      "composite": {
        "score_0_100": 71.6,
        "confidence": "low",
        "components": [
          {
            "source_id": "lmarena",
            "metric": "lmarena_text_overall_rating",
            "normalized_0_100": 71.6,
            "weight": 0.35
          }
        ],
        "derived_from": [
          "lmarena:lmarena_text_overall_rating"
        ],
        "limitations": [
          "This is an external model-intelligence prior, not a RunLocalAI local speed or fit measurement.",
          "Provider papers, human-preference ratings, and LiveBench judgments are not interchangeable with quantized local execution.",
          "Use this score only beside Will-It-Run fit, quantization, context, runtime, and measured tok/s evidence."
        ]
      },
      "scores": [
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_chinese_rating",
          "value": 1351.8128224315674,
          "normalized_0_100": 75.38,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 158,
          "lower": 1322.1634845162948,
          "upper": 1381.4621603468397,
          "category": "chinese",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_coding_rating",
          "value": 1351.4547636487816,
          "normalized_0_100": 75.34,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 170,
          "lower": 1336.9247910662575,
          "upper": 1365.9847362313058,
          "category": "coding",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_overall_rating",
          "value": 1320.032749928244,
          "normalized_0_100": 71.6,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 178,
          "lower": 1313.2332753616158,
          "upper": 1326.8322244948722,
          "category": "overall",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        }
      ]
    },
    {
      "model_key": "moonshotai-kimi-k2-5",
      "display_name": "moonshotai/Kimi-K2.5",
      "provider": "moonshotai",
      "aliases": [
        "moonshotai/Kimi-K2.5"
      ],
      "openness": "open",
      "license": "Unknown",
      "parameter_billions": 1058.6,
      "context_window": null,
      "modalities": [
        "text"
      ],
      "source_ids": [
        "openevals"
      ],
      "composite": {
        "score_0_100": 71.57,
        "confidence": "low",
        "components": [
          {
            "source_id": "openevals",
            "metric": "openevals_aggregate",
            "normalized_0_100": 71.57,
            "weight": 0.45
          }
        ],
        "derived_from": [
          "openevals:openevals_aggregate"
        ],
        "limitations": [
          "This is an external model-intelligence prior, not a RunLocalAI local speed or fit measurement.",
          "Provider papers, human-preference ratings, and LiveBench judgments are not interchangeable with quantized local execution.",
          "Use this score only beside Will-It-Run fit, quantization, context, runtime, and measured tok/s evidence."
        ]
      },
      "scores": [
        {
          "source_id": "openevals",
          "metric": "aime2026",
          "value": 95.83,
          "normalized_0_100": 95.83,
          "scale": "0-100",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/OpenEvals/leaderboard-data",
          "source_dataset": "OpenEvals/leaderboard-data",
          "source_config": "default",
          "source_split": "train",
          "source_row_count": 105,
          "captured_at": "2026-05-28T11:57:28.766Z",
          "evidence_note": "External model quality prior from OpenEvals; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "openevals",
          "metric": "gpqa",
          "value": 87.6,
          "normalized_0_100": 87.6,
          "scale": "0-100",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/OpenEvals/leaderboard-data",
          "source_dataset": "OpenEvals/leaderboard-data",
          "source_config": "default",
          "source_split": "train",
          "source_row_count": 105,
          "captured_at": "2026-05-28T11:57:28.766Z",
          "evidence_note": "External model quality prior from OpenEvals; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "openevals",
          "metric": "hle",
          "value": 50.2,
          "normalized_0_100": 50.2,
          "scale": "0-100",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/OpenEvals/leaderboard-data",
          "source_dataset": "OpenEvals/leaderboard-data",
          "source_config": "default",
          "source_split": "train",
          "source_row_count": 105,
          "captured_at": "2026-05-28T11:57:28.766Z",
          "evidence_note": "External model quality prior from OpenEvals; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "openevals",
          "metric": "hmmt2026",
          "value": 87.12,
          "normalized_0_100": 87.12,
          "scale": "0-100",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/OpenEvals/leaderboard-data",
          "source_dataset": "OpenEvals/leaderboard-data",
          "source_config": "default",
          "source_split": "train",
          "source_row_count": 105,
          "captured_at": "2026-05-28T11:57:28.766Z",
          "evidence_note": "External model quality prior from OpenEvals; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "openevals",
          "metric": "mmlu_pro",
          "value": 87.1,
          "normalized_0_100": 87.1,
          "scale": "0-100",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/OpenEvals/leaderboard-data",
          "source_dataset": "OpenEvals/leaderboard-data",
          "source_config": "default",
          "source_split": "train",
          "source_row_count": 105,
          "captured_at": "2026-05-28T11:57:28.766Z",
          "evidence_note": "External model quality prior from OpenEvals; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "openevals",
          "metric": "openevals_aggregate",
          "value": 71.57,
          "normalized_0_100": 71.57,
          "scale": "0-100",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/OpenEvals/leaderboard-data",
          "source_dataset": "OpenEvals/leaderboard-data",
          "source_config": "default",
          "source_split": "train",
          "source_row_count": 105,
          "captured_at": "2026-05-28T11:57:28.766Z",
          "evidence_note": "External model quality prior from OpenEvals; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "openevals",
          "metric": "swe_pro",
          "value": 50.7,
          "normalized_0_100": 50.7,
          "scale": "0-100",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/OpenEvals/leaderboard-data",
          "source_dataset": "OpenEvals/leaderboard-data",
          "source_config": "default",
          "source_split": "train",
          "source_row_count": 105,
          "captured_at": "2026-05-28T11:57:28.766Z",
          "evidence_note": "External model quality prior from OpenEvals; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "openevals",
          "metric": "swe_verified",
          "value": 70.8,
          "normalized_0_100": 70.8,
          "scale": "0-100",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/OpenEvals/leaderboard-data",
          "source_dataset": "OpenEvals/leaderboard-data",
          "source_config": "default",
          "source_split": "train",
          "source_row_count": 105,
          "captured_at": "2026-05-28T11:57:28.766Z",
          "evidence_note": "External model quality prior from OpenEvals; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "openevals",
          "metric": "terminal_bench",
          "value": 43.2,
          "normalized_0_100": 43.2,
          "scale": "0-100",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/OpenEvals/leaderboard-data",
          "source_dataset": "OpenEvals/leaderboard-data",
          "source_config": "default",
          "source_split": "train",
          "source_row_count": 105,
          "captured_at": "2026-05-28T11:57:28.766Z",
          "evidence_note": "External model quality prior from OpenEvals; not a RunLocalAI local execution measurement."
        }
      ]
    },
    {
      "model_key": "hunyuan-turbos-20250226",
      "display_name": "hunyuan-turbos-20250226",
      "provider": "tencent",
      "aliases": [
        "hunyuan-turbos-20250226"
      ],
      "openness": null,
      "license": "Proprietary",
      "parameter_billions": null,
      "context_window": null,
      "modalities": [
        "text"
      ],
      "source_ids": [
        "lmarena"
      ],
      "composite": {
        "score_0_100": 71.56,
        "confidence": "low",
        "components": [
          {
            "source_id": "lmarena",
            "metric": "lmarena_text_overall_rating",
            "normalized_0_100": 71.56,
            "weight": 0.35
          }
        ],
        "derived_from": [
          "lmarena:lmarena_text_overall_rating"
        ],
        "limitations": [
          "This is an external model-intelligence prior, not a RunLocalAI local speed or fit measurement.",
          "Provider papers, human-preference ratings, and LiveBench judgments are not interchangeable with quantized local execution.",
          "Use this score only beside Will-It-Run fit, quantization, context, runtime, and measured tok/s evidence."
        ]
      },
      "scores": [
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_chinese_rating",
          "value": 1323.2764407489574,
          "normalized_0_100": 71.98,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 175,
          "lower": 1283.8848003446465,
          "upper": 1362.6680811532683,
          "category": "chinese",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_coding_rating",
          "value": 1343.0760091596335,
          "normalized_0_100": 74.34,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 173,
          "lower": 1312.3223333545905,
          "upper": 1373.8296849646767,
          "category": "coding",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_overall_rating",
          "value": 1319.7323847234306,
          "normalized_0_100": 71.56,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 179,
          "lower": 1307.9988725479363,
          "upper": 1331.4658968989247,
          "category": "overall",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        }
      ]
    },
    {
      "model_key": "llama-3-1-nemotron-ultra-253b-v1",
      "display_name": "llama-3.1-nemotron-ultra-253b-v1",
      "provider": "nvidia",
      "aliases": [
        "llama-3.1-nemotron-ultra-253b-v1"
      ],
      "openness": null,
      "license": "Nvidia Open Model",
      "parameter_billions": null,
      "context_window": null,
      "modalities": [
        "text"
      ],
      "source_ids": [
        "lmarena"
      ],
      "composite": {
        "score_0_100": 71.53,
        "confidence": "low",
        "components": [
          {
            "source_id": "lmarena",
            "metric": "lmarena_text_overall_rating",
            "normalized_0_100": 71.53,
            "weight": 0.35
          }
        ],
        "derived_from": [
          "lmarena:lmarena_text_overall_rating"
        ],
        "limitations": [
          "This is an external model-intelligence prior, not a RunLocalAI local speed or fit measurement.",
          "Provider papers, human-preference ratings, and LiveBench judgments are not interchangeable with quantized local execution.",
          "Use this score only beside Will-It-Run fit, quantization, context, runtime, and measured tok/s evidence."
        ]
      },
      "scores": [
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_coding_rating",
          "value": 1312.4259111188226,
          "normalized_0_100": 70.69,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 192,
          "lower": 1282.68524551178,
          "upper": 1342.1665767258648,
          "category": "coding",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_overall_rating",
          "value": 1319.4402640409323,
          "normalized_0_100": 71.53,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 180,
          "lower": 1307.900016731062,
          "upper": 1330.9805113508028,
          "category": "overall",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        }
      ]
    },
    {
      "model_key": "o3-mini",
      "display_name": "o3-mini",
      "provider": "openai",
      "aliases": [
        "o3-mini"
      ],
      "openness": null,
      "license": "Proprietary",
      "parameter_billions": null,
      "context_window": null,
      "modalities": [
        "text"
      ],
      "source_ids": [
        "lmarena"
      ],
      "composite": {
        "score_0_100": 71.47,
        "confidence": "low",
        "components": [
          {
            "source_id": "lmarena",
            "metric": "lmarena_text_overall_rating",
            "normalized_0_100": 71.47,
            "weight": 0.35
          }
        ],
        "derived_from": [
          "lmarena:lmarena_text_overall_rating"
        ],
        "limitations": [
          "This is an external model-intelligence prior, not a RunLocalAI local speed or fit measurement.",
          "Provider papers, human-preference ratings, and LiveBench judgments are not interchangeable with quantized local execution.",
          "Use this score only beside Will-It-Run fit, quantization, context, runtime, and measured tok/s evidence."
        ]
      },
      "scores": [
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_chinese_rating",
          "value": 1323.6775402798298,
          "normalized_0_100": 72.03,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 173,
          "lower": 1313.2678860164488,
          "upper": 1334.087194543211,
          "category": "chinese",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_coding_rating",
          "value": 1362.156654718833,
          "normalized_0_100": 76.61,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 162,
          "lower": 1355.7606761993534,
          "upper": 1368.5526332383124,
          "category": "coding",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_overall_rating",
          "value": 1318.9856166296186,
          "normalized_0_100": 71.47,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 182,
          "lower": 1315.477282844818,
          "upper": 1322.493950414419,
          "category": "overall",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        }
      ]
    },
    {
      "model_key": "stepfun-ai-step-3-5-flash",
      "display_name": "stepfun-ai/Step-3.5-Flash",
      "provider": "stepfun-ai",
      "aliases": [
        "stepfun-ai/Step-3.5-Flash"
      ],
      "openness": "open",
      "license": "Unknown",
      "parameter_billions": 199.4,
      "context_window": null,
      "modalities": [
        "text"
      ],
      "source_ids": [
        "openevals"
      ],
      "composite": {
        "score_0_100": 71.35,
        "confidence": "low",
        "components": [
          {
            "source_id": "openevals",
            "metric": "openevals_aggregate",
            "normalized_0_100": 71.35,
            "weight": 0.45
          }
        ],
        "derived_from": [
          "openevals:openevals_aggregate"
        ],
        "limitations": [
          "This is an external model-intelligence prior, not a RunLocalAI local speed or fit measurement.",
          "Provider papers, human-preference ratings, and LiveBench judgments are not interchangeable with quantized local execution.",
          "Use this score only beside Will-It-Run fit, quantization, context, runtime, and measured tok/s evidence."
        ]
      },
      "scores": [
        {
          "source_id": "openevals",
          "metric": "aime2026",
          "value": 96.67,
          "normalized_0_100": 96.67,
          "scale": "0-100",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/OpenEvals/leaderboard-data",
          "source_dataset": "OpenEvals/leaderboard-data",
          "source_config": "default",
          "source_split": "train",
          "source_row_count": 105,
          "captured_at": "2026-05-28T11:57:28.766Z",
          "evidence_note": "External model quality prior from OpenEvals; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "openevals",
          "metric": "gpqa",
          "value": 83.5,
          "normalized_0_100": 83.5,
          "scale": "0-100",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/OpenEvals/leaderboard-data",
          "source_dataset": "OpenEvals/leaderboard-data",
          "source_config": "default",
          "source_split": "train",
          "source_row_count": 105,
          "captured_at": "2026-05-28T11:57:28.766Z",
          "evidence_note": "External model quality prior from OpenEvals; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "openevals",
          "metric": "hle",
          "value": 23.1,
          "normalized_0_100": 23.1,
          "scale": "0-100",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/OpenEvals/leaderboard-data",
          "source_dataset": "OpenEvals/leaderboard-data",
          "source_config": "default",
          "source_split": "train",
          "source_row_count": 105,
          "captured_at": "2026-05-28T11:57:28.766Z",
          "evidence_note": "External model quality prior from OpenEvals; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "openevals",
          "metric": "hmmt2026",
          "value": 86.36,
          "normalized_0_100": 86.36,
          "scale": "0-100",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/OpenEvals/leaderboard-data",
          "source_dataset": "OpenEvals/leaderboard-data",
          "source_config": "default",
          "source_split": "train",
          "source_row_count": 105,
          "captured_at": "2026-05-28T11:57:28.766Z",
          "evidence_note": "External model quality prior from OpenEvals; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "openevals",
          "metric": "mmlu_pro",
          "value": 84.4,
          "normalized_0_100": 84.4,
          "scale": "0-100",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/OpenEvals/leaderboard-data",
          "source_dataset": "OpenEvals/leaderboard-data",
          "source_config": "default",
          "source_split": "train",
          "source_row_count": 105,
          "captured_at": "2026-05-28T11:57:28.766Z",
          "evidence_note": "External model quality prior from OpenEvals; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "openevals",
          "metric": "openevals_aggregate",
          "value": 71.35,
          "normalized_0_100": 71.35,
          "scale": "0-100",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/OpenEvals/leaderboard-data",
          "source_dataset": "OpenEvals/leaderboard-data",
          "source_config": "default",
          "source_split": "train",
          "source_row_count": 105,
          "captured_at": "2026-05-28T11:57:28.766Z",
          "evidence_note": "External model quality prior from OpenEvals; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "openevals",
          "metric": "swe_verified",
          "value": 74.4,
          "normalized_0_100": 74.4,
          "scale": "0-100",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/OpenEvals/leaderboard-data",
          "source_dataset": "OpenEvals/leaderboard-data",
          "source_config": "default",
          "source_split": "train",
          "source_row_count": 105,
          "captured_at": "2026-05-28T11:57:28.766Z",
          "evidence_note": "External model quality prior from OpenEvals; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "openevals",
          "metric": "terminal_bench",
          "value": 51,
          "normalized_0_100": 51,
          "scale": "0-100",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/OpenEvals/leaderboard-data",
          "source_dataset": "OpenEvals/leaderboard-data",
          "source_config": "default",
          "source_split": "train",
          "source_row_count": 105,
          "captured_at": "2026-05-28T11:57:28.766Z",
          "evidence_note": "External model quality prior from OpenEvals; not a RunLocalAI local execution measurement."
        }
      ]
    },
    {
      "model_key": "qwen3-30b-a3b",
      "display_name": "qwen3-30b-a3b",
      "provider": "alibaba",
      "aliases": [
        "qwen3-30b-a3b"
      ],
      "openness": null,
      "license": "Apache 2.0",
      "parameter_billions": null,
      "context_window": null,
      "modalities": [
        "text"
      ],
      "source_ids": [
        "lmarena"
      ],
      "composite": {
        "score_0_100": 71.26,
        "confidence": "low",
        "components": [
          {
            "source_id": "lmarena",
            "metric": "lmarena_text_overall_rating",
            "normalized_0_100": 71.26,
            "weight": 0.35
          }
        ],
        "derived_from": [
          "lmarena:lmarena_text_overall_rating"
        ],
        "limitations": [
          "This is an external model-intelligence prior, not a RunLocalAI local speed or fit measurement.",
          "Provider papers, human-preference ratings, and LiveBench judgments are not interchangeable with quantized local execution.",
          "Use this score only beside Will-It-Run fit, quantization, context, runtime, and measured tok/s evidence."
        ]
      },
      "scores": [
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_chinese_rating",
          "value": 1355.7580371067472,
          "normalized_0_100": 75.85,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 156,
          "lower": 1339.140067606782,
          "upper": 1372.3760066067125,
          "category": "chinese",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_coding_rating",
          "value": 1337.9355216197955,
          "normalized_0_100": 73.73,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 177,
          "lower": 1329.1547133741637,
          "upper": 1346.7163298654273,
          "category": "coding",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_overall_rating",
          "value": 1317.1421725936545,
          "normalized_0_100": 71.26,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 183,
          "lower": 1312.450860432859,
          "upper": 1321.8334847544502,
          "category": "overall",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        }
      ]
    },
    {
      "model_key": "o1-mini",
      "display_name": "o1-mini",
      "provider": "openai",
      "aliases": [
        "o1-mini"
      ],
      "openness": null,
      "license": "Proprietary",
      "parameter_billions": null,
      "context_window": null,
      "modalities": [
        "text"
      ],
      "source_ids": [
        "lmarena"
      ],
      "composite": {
        "score_0_100": 71.25,
        "confidence": "low",
        "components": [
          {
            "source_id": "lmarena",
            "metric": "lmarena_text_overall_rating",
            "normalized_0_100": 71.25,
            "weight": 0.35
          }
        ],
        "derived_from": [
          "lmarena:lmarena_text_overall_rating"
        ],
        "limitations": [
          "This is an external model-intelligence prior, not a RunLocalAI local speed or fit measurement.",
          "Provider papers, human-preference ratings, and LiveBench judgments are not interchangeable with quantized local execution.",
          "Use this score only beside Will-It-Run fit, quantization, context, runtime, and measured tok/s evidence."
        ]
      },
      "scores": [
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_chinese_rating",
          "value": 1313.2863931298727,
          "normalized_0_100": 70.8,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 179,
          "lower": 1303.30039002783,
          "upper": 1323.2723962319155,
          "category": "chinese",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_coding_rating",
          "value": 1362.8216154528477,
          "normalized_0_100": 76.69,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 161,
          "lower": 1355.5020678061212,
          "upper": 1370.1411630995744,
          "category": "coding",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_overall_rating",
          "value": 1317.1172406803225,
          "normalized_0_100": 71.25,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 184,
          "lower": 1313.5133735042184,
          "upper": 1320.7211078564264,
          "category": "overall",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        }
      ]
    },
    {
      "model_key": "mistralai-mistral-small-4-119b-2603",
      "display_name": "mistralai/Mistral-Small-4-119B-2603",
      "provider": "mistralai",
      "aliases": [
        "mistralai/Mistral-Small-4-119B-2603"
      ],
      "openness": "open",
      "license": "Unknown",
      "parameter_billions": 119.4,
      "context_window": null,
      "modalities": [
        "text"
      ],
      "source_ids": [
        "openevals"
      ],
      "composite": {
        "score_0_100": 71.2,
        "confidence": "low",
        "components": [
          {
            "source_id": "openevals",
            "metric": "openevals_aggregate",
            "normalized_0_100": 71.2,
            "weight": 0.45
          }
        ],
        "derived_from": [
          "openevals:openevals_aggregate"
        ],
        "limitations": [
          "This is an external model-intelligence prior, not a RunLocalAI local speed or fit measurement.",
          "Provider papers, human-preference ratings, and LiveBench judgments are not interchangeable with quantized local execution.",
          "Use this score only beside Will-It-Run fit, quantization, context, runtime, and measured tok/s evidence."
        ]
      },
      "scores": [
        {
          "source_id": "openevals",
          "metric": "gpqa",
          "value": 71.2,
          "normalized_0_100": 71.2,
          "scale": "0-100",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/OpenEvals/leaderboard-data",
          "source_dataset": "OpenEvals/leaderboard-data",
          "source_config": "default",
          "source_split": "train",
          "source_row_count": 105,
          "captured_at": "2026-05-28T11:57:28.766Z",
          "evidence_note": "External model quality prior from OpenEvals; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "openevals",
          "metric": "openevals_aggregate",
          "value": 71.2,
          "normalized_0_100": 71.2,
          "scale": "0-100",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/OpenEvals/leaderboard-data",
          "source_dataset": "OpenEvals/leaderboard-data",
          "source_config": "default",
          "source_split": "train",
          "source_row_count": 105,
          "captured_at": "2026-05-28T11:57:28.766Z",
          "evidence_note": "External model quality prior from OpenEvals; not a RunLocalAI local execution measurement."
        }
      ]
    },
    {
      "model_key": "claude-3-7-sonnet-20250219-thinking-32k",
      "display_name": "claude-3-7-sonnet-20250219-thinking-32k",
      "provider": "anthropic",
      "aliases": [
        "claude-3-7-sonnet-20250219-thinking-32k"
      ],
      "openness": null,
      "license": "Proprietary",
      "parameter_billions": null,
      "context_window": null,
      "modalities": [
        "text"
      ],
      "source_ids": [
        "lmarena"
      ],
      "composite": {
        "score_0_100": 70.93,
        "confidence": "low",
        "components": [
          {
            "source_id": "lmarena",
            "metric": "lmarena_text_overall_rating",
            "normalized_0_100": 70.93,
            "weight": 0.35
          }
        ],
        "derived_from": [
          "lmarena:lmarena_text_overall_rating"
        ],
        "limitations": [
          "This is an external model-intelligence prior, not a RunLocalAI local speed or fit measurement.",
          "Provider papers, human-preference ratings, and LiveBench judgments are not interchangeable with quantized local execution.",
          "Use this score only beside Will-It-Run fit, quantization, context, runtime, and measured tok/s evidence."
        ]
      },
      "scores": [
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_chinese_rating",
          "value": 1306.1435167041648,
          "normalized_0_100": 69.95,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 183,
          "lower": 1292.5123163504047,
          "upper": 1319.7747170579246,
          "category": "chinese",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_coding_rating",
          "value": 1361.4582639279247,
          "normalized_0_100": 76.53,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 163,
          "lower": 1353.5747497867997,
          "upper": 1369.3417780690495,
          "category": "coding",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_overall_rating",
          "value": 1314.4080184683849,
          "normalized_0_100": 70.93,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 185,
          "lower": 1310.2061787980238,
          "upper": 1318.609858138746,
          "category": "overall",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        }
      ]
    },
    {
      "model_key": "olmo-3-1-32b-instruct",
      "display_name": "olmo-3.1-32b-instruct",
      "provider": "allenai",
      "aliases": [
        "olmo-3.1-32b-instruct"
      ],
      "openness": null,
      "license": "Apache 2.0",
      "parameter_billions": null,
      "context_window": null,
      "modalities": [
        "text"
      ],
      "source_ids": [
        "lmarena"
      ],
      "composite": {
        "score_0_100": 70.66,
        "confidence": "low",
        "components": [
          {
            "source_id": "lmarena",
            "metric": "lmarena_text_overall_rating",
            "normalized_0_100": 70.66,
            "weight": 0.35
          }
        ],
        "derived_from": [
          "lmarena:lmarena_text_overall_rating"
        ],
        "limitations": [
          "This is an external model-intelligence prior, not a RunLocalAI local speed or fit measurement.",
          "Provider papers, human-preference ratings, and LiveBench judgments are not interchangeable with quantized local execution.",
          "Use this score only beside Will-It-Run fit, quantization, context, runtime, and measured tok/s evidence."
        ]
      },
      "scores": [
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_chinese_rating",
          "value": 1298.221539776242,
          "normalized_0_100": 69.01,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 186,
          "lower": 1271.3977934160912,
          "upper": 1325.0452861363929,
          "category": "chinese",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_coding_rating",
          "value": 1349.4404549472372,
          "normalized_0_100": 75.1,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 172,
          "lower": 1337.6683583352146,
          "upper": 1361.2125515592597,
          "category": "coding",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_overall_rating",
          "value": 1312.0930963399178,
          "normalized_0_100": 70.66,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 186,
          "lower": 1306.0410860642342,
          "upper": 1318.1451066156014,
          "category": "overall",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        }
      ]
    },
    {
      "model_key": "zai-org-glm-5",
      "display_name": "zai-org/GLM-5",
      "provider": "zai-org",
      "aliases": [
        "zai-org/GLM-5"
      ],
      "openness": "open",
      "license": "Unknown",
      "parameter_billions": 753.9,
      "context_window": null,
      "modalities": [
        "text"
      ],
      "source_ids": [
        "openevals"
      ],
      "composite": {
        "score_0_100": 70.65,
        "confidence": "low",
        "components": [
          {
            "source_id": "openevals",
            "metric": "openevals_aggregate",
            "normalized_0_100": 70.65,
            "weight": 0.45
          }
        ],
        "derived_from": [
          "openevals:openevals_aggregate"
        ],
        "limitations": [
          "This is an external model-intelligence prior, not a RunLocalAI local speed or fit measurement.",
          "Provider papers, human-preference ratings, and LiveBench judgments are not interchangeable with quantized local execution.",
          "Use this score only beside Will-It-Run fit, quantization, context, runtime, and measured tok/s evidence."
        ]
      },
      "scores": [
        {
          "source_id": "openevals",
          "metric": "aime2026",
          "value": 95.83,
          "normalized_0_100": 95.83,
          "scale": "0-100",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/OpenEvals/leaderboard-data",
          "source_dataset": "OpenEvals/leaderboard-data",
          "source_config": "default",
          "source_split": "train",
          "source_row_count": 105,
          "captured_at": "2026-05-28T11:57:28.766Z",
          "evidence_note": "External model quality prior from OpenEvals; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "openevals",
          "metric": "gpqa",
          "value": 86,
          "normalized_0_100": 86,
          "scale": "0-100",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/OpenEvals/leaderboard-data",
          "source_dataset": "OpenEvals/leaderboard-data",
          "source_config": "default",
          "source_split": "train",
          "source_row_count": 105,
          "captured_at": "2026-05-28T11:57:28.766Z",
          "evidence_note": "External model quality prior from OpenEvals; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "openevals",
          "metric": "hle",
          "value": 30.5,
          "normalized_0_100": 30.5,
          "scale": "0-100",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/OpenEvals/leaderboard-data",
          "source_dataset": "OpenEvals/leaderboard-data",
          "source_config": "default",
          "source_split": "train",
          "source_row_count": 105,
          "captured_at": "2026-05-28T11:57:28.766Z",
          "evidence_note": "External model quality prior from OpenEvals; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "openevals",
          "metric": "hmmt2026",
          "value": 86.36,
          "normalized_0_100": 86.36,
          "scale": "0-100",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/OpenEvals/leaderboard-data",
          "source_dataset": "OpenEvals/leaderboard-data",
          "source_config": "default",
          "source_split": "train",
          "source_row_count": 105,
          "captured_at": "2026-05-28T11:57:28.766Z",
          "evidence_note": "External model quality prior from OpenEvals; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "openevals",
          "metric": "openevals_aggregate",
          "value": 70.65,
          "normalized_0_100": 70.65,
          "scale": "0-100",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/OpenEvals/leaderboard-data",
          "source_dataset": "OpenEvals/leaderboard-data",
          "source_config": "default",
          "source_split": "train",
          "source_row_count": 105,
          "captured_at": "2026-05-28T11:57:28.766Z",
          "evidence_note": "External model quality prior from OpenEvals; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "openevals",
          "metric": "swe_verified",
          "value": 72.8,
          "normalized_0_100": 72.8,
          "scale": "0-100",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/OpenEvals/leaderboard-data",
          "source_dataset": "OpenEvals/leaderboard-data",
          "source_config": "default",
          "source_split": "train",
          "source_row_count": 105,
          "captured_at": "2026-05-28T11:57:28.766Z",
          "evidence_note": "External model quality prior from OpenEvals; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "openevals",
          "metric": "terminal_bench",
          "value": 52.4,
          "normalized_0_100": 52.4,
          "scale": "0-100",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/OpenEvals/leaderboard-data",
          "source_dataset": "OpenEvals/leaderboard-data",
          "source_config": "default",
          "source_split": "train",
          "source_row_count": 105,
          "captured_at": "2026-05-28T11:57:28.766Z",
          "evidence_note": "External model quality prior from OpenEvals; not a RunLocalAI local execution measurement."
        }
      ]
    },
    {
      "model_key": "hunyuan-turbo-0110",
      "display_name": "hunyuan-turbo-0110",
      "provider": "tencent",
      "aliases": [
        "hunyuan-turbo-0110"
      ],
      "openness": null,
      "license": "Proprietary",
      "parameter_billions": null,
      "context_window": null,
      "modalities": [
        "text"
      ],
      "source_ids": [
        "lmarena"
      ],
      "composite": {
        "score_0_100": 70.58,
        "confidence": "low",
        "components": [
          {
            "source_id": "lmarena",
            "metric": "lmarena_text_overall_rating",
            "normalized_0_100": 70.58,
            "weight": 0.35
          }
        ],
        "derived_from": [
          "lmarena:lmarena_text_overall_rating"
        ],
        "limitations": [
          "This is an external model-intelligence prior, not a RunLocalAI local speed or fit measurement.",
          "Provider papers, human-preference ratings, and LiveBench judgments are not interchangeable with quantized local execution.",
          "Use this score only beside Will-It-Run fit, quantization, context, runtime, and measured tok/s evidence."
        ]
      },
      "scores": [
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_chinese_rating",
          "value": 1357.3762581411022,
          "normalized_0_100": 76.04,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 154,
          "lower": 1317.9420724337092,
          "upper": 1396.8104438484952,
          "category": "chinese",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_coding_rating",
          "value": 1319.8636586893908,
          "normalized_0_100": 71.58,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 186,
          "lower": 1290.2830756692797,
          "upper": 1349.4442417095017,
          "category": "coding",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_overall_rating",
          "value": 1311.423566810358,
          "normalized_0_100": 70.58,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 187,
          "lower": 1299.961335416398,
          "upper": 1322.8857982043173,
          "category": "overall",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        }
      ]
    },
    {
      "model_key": "llama-3-3-nemotron-49b-super-v1",
      "display_name": "llama-3.3-nemotron-49b-super-v1",
      "provider": "nvidia",
      "aliases": [
        "llama-3.3-nemotron-49b-super-v1"
      ],
      "openness": null,
      "license": "Nvidia",
      "parameter_billions": null,
      "context_window": null,
      "modalities": [
        "text"
      ],
      "source_ids": [
        "lmarena"
      ],
      "composite": {
        "score_0_100": 70.21,
        "confidence": "low",
        "components": [
          {
            "source_id": "lmarena",
            "metric": "lmarena_text_overall_rating",
            "normalized_0_100": 70.21,
            "weight": 0.35
          }
        ],
        "derived_from": [
          "lmarena:lmarena_text_overall_rating"
        ],
        "limitations": [
          "This is an external model-intelligence prior, not a RunLocalAI local speed or fit measurement.",
          "Provider papers, human-preference ratings, and LiveBench judgments are not interchangeable with quantized local execution.",
          "Use this score only beside Will-It-Run fit, quantization, context, runtime, and measured tok/s evidence."
        ]
      },
      "scores": [
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_chinese_rating",
          "value": 1276.6359314911924,
          "normalized_0_100": 66.44,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 192,
          "lower": 1237.3206413784737,
          "upper": 1315.9512216039107,
          "category": "chinese",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_coding_rating",
          "value": 1296.1352757032832,
          "normalized_0_100": 68.76,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 204,
          "lower": 1265.2560045825321,
          "upper": 1327.0145468240344,
          "category": "coding",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_overall_rating",
          "value": 1308.3776553666767,
          "normalized_0_100": 70.21,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 188,
          "lower": 1296.297563982989,
          "upper": 1320.457746750364,
          "category": "overall",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        }
      ]
    },
    {
      "model_key": "fireredteam-firered-ocr",
      "display_name": "FireRedTeam/FireRed-OCR",
      "provider": "FireRedTeam",
      "aliases": [
        "FireRedTeam/FireRed-OCR"
      ],
      "openness": "open",
      "license": "Unknown",
      "parameter_billions": 2.1,
      "context_window": null,
      "modalities": [
        "text"
      ],
      "source_ids": [
        "openevals"
      ],
      "composite": {
        "score_0_100": 70.2,
        "confidence": "low",
        "components": [
          {
            "source_id": "openevals",
            "metric": "openevals_aggregate",
            "normalized_0_100": 70.2,
            "weight": 0.45
          }
        ],
        "derived_from": [
          "openevals:openevals_aggregate"
        ],
        "limitations": [
          "This is an external model-intelligence prior, not a RunLocalAI local speed or fit measurement.",
          "Provider papers, human-preference ratings, and LiveBench judgments are not interchangeable with quantized local execution.",
          "Use this score only beside Will-It-Run fit, quantization, context, runtime, and measured tok/s evidence."
        ]
      },
      "scores": [
        {
          "source_id": "openevals",
          "metric": "olm_ocr",
          "value": 70.2,
          "normalized_0_100": 70.2,
          "scale": "0-100",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/OpenEvals/leaderboard-data",
          "source_dataset": "OpenEvals/leaderboard-data",
          "source_config": "default",
          "source_split": "train",
          "source_row_count": 105,
          "captured_at": "2026-05-28T11:57:28.766Z",
          "evidence_note": "External model quality prior from OpenEvals; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "openevals",
          "metric": "openevals_aggregate",
          "value": 70.2,
          "normalized_0_100": 70.2,
          "scale": "0-100",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/OpenEvals/leaderboard-data",
          "source_dataset": "OpenEvals/leaderboard-data",
          "source_config": "default",
          "source_split": "train",
          "source_row_count": 105,
          "captured_at": "2026-05-28T11:57:28.766Z",
          "evidence_note": "External model quality prior from OpenEvals; not a RunLocalAI local execution measurement."
        }
      ]
    },
    {
      "model_key": "gemma-3n-e4b-it",
      "display_name": "gemma-3n-e4b-it",
      "provider": "google",
      "aliases": [
        "gemma-3n-e4b-it"
      ],
      "openness": null,
      "license": "Gemma",
      "parameter_billions": null,
      "context_window": null,
      "modalities": [
        "text"
      ],
      "source_ids": [
        "lmarena"
      ],
      "composite": {
        "score_0_100": 69.96,
        "confidence": "low",
        "components": [
          {
            "source_id": "lmarena",
            "metric": "lmarena_text_overall_rating",
            "normalized_0_100": 69.96,
            "weight": 0.35
          }
        ],
        "derived_from": [
          "lmarena:lmarena_text_overall_rating"
        ],
        "limitations": [
          "This is an external model-intelligence prior, not a RunLocalAI local speed or fit measurement.",
          "Provider papers, human-preference ratings, and LiveBench judgments are not interchangeable with quantized local execution.",
          "Use this score only beside Will-It-Run fit, quantization, context, runtime, and measured tok/s evidence."
        ]
      },
      "scores": [
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_chinese_rating",
          "value": 1308.4124968664687,
          "normalized_0_100": 70.22,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 182,
          "lower": 1291.0772889842524,
          "upper": 1325.747704748685,
          "category": "chinese",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_coding_rating",
          "value": 1269.7623539804524,
          "normalized_0_100": 65.62,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 225,
          "lower": 1259.5838205486261,
          "upper": 1279.9408874122787,
          "category": "coding",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_overall_rating",
          "value": 1306.2058849239888,
          "normalized_0_100": 69.96,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 189,
          "lower": 1301.0886586223605,
          "upper": 1311.3231112256171,
          "category": "overall",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        }
      ]
    },
    {
      "model_key": "grok-2-2024-08-13",
      "display_name": "grok-2-2024-08-13",
      "provider": "xai",
      "aliases": [
        "grok-2-2024-08-13"
      ],
      "openness": null,
      "license": "Proprietary",
      "parameter_billions": null,
      "context_window": null,
      "modalities": [
        "text"
      ],
      "source_ids": [
        "lmarena"
      ],
      "composite": {
        "score_0_100": 69.77,
        "confidence": "low",
        "components": [
          {
            "source_id": "lmarena",
            "metric": "lmarena_text_overall_rating",
            "normalized_0_100": 69.77,
            "weight": 0.35
          }
        ],
        "derived_from": [
          "lmarena:lmarena_text_overall_rating"
        ],
        "limitations": [
          "This is an external model-intelligence prior, not a RunLocalAI local speed or fit measurement.",
          "Provider papers, human-preference ratings, and LiveBench judgments are not interchangeable with quantized local execution.",
          "Use this score only beside Will-It-Run fit, quantization, context, runtime, and measured tok/s evidence."
        ]
      },
      "scores": [
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_chinese_rating",
          "value": 1288.3979957168021,
          "normalized_0_100": 67.84,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 190,
          "lower": 1279.0976098313572,
          "upper": 1297.698381602247,
          "category": "chinese",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_coding_rating",
          "value": 1287.969038869359,
          "normalized_0_100": 67.79,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 212,
          "lower": 1281.1139218448634,
          "upper": 1294.8241558938546,
          "category": "coding",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_overall_rating",
          "value": 1304.6245055850504,
          "normalized_0_100": 69.77,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 190,
          "lower": 1300.9940477361042,
          "upper": 1308.254963433997,
          "category": "overall",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        }
      ]
    },
    {
      "model_key": "nanonets-nanonets-ocr2-3b",
      "display_name": "nanonets/Nanonets-OCR2-3B",
      "provider": "nanonets",
      "aliases": [
        "nanonets/Nanonets-OCR2-3B"
      ],
      "openness": "open",
      "license": "Unknown",
      "parameter_billions": null,
      "context_window": null,
      "modalities": [
        "text"
      ],
      "source_ids": [
        "openevals"
      ],
      "composite": {
        "score_0_100": 69.5,
        "confidence": "low",
        "components": [
          {
            "source_id": "openevals",
            "metric": "openevals_aggregate",
            "normalized_0_100": 69.5,
            "weight": 0.45
          }
        ],
        "derived_from": [
          "openevals:openevals_aggregate"
        ],
        "limitations": [
          "This is an external model-intelligence prior, not a RunLocalAI local speed or fit measurement.",
          "Provider papers, human-preference ratings, and LiveBench judgments are not interchangeable with quantized local execution.",
          "Use this score only beside Will-It-Run fit, quantization, context, runtime, and measured tok/s evidence."
        ]
      },
      "scores": [
        {
          "source_id": "openevals",
          "metric": "olm_ocr",
          "value": 69.5,
          "normalized_0_100": 69.5,
          "scale": "0-100",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/OpenEvals/leaderboard-data",
          "source_dataset": "OpenEvals/leaderboard-data",
          "source_config": "default",
          "source_split": "train",
          "source_row_count": 105,
          "captured_at": "2026-05-28T11:57:28.766Z",
          "evidence_note": "External model quality prior from OpenEvals; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "openevals",
          "metric": "openevals_aggregate",
          "value": 69.5,
          "normalized_0_100": 69.5,
          "scale": "0-100",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/OpenEvals/leaderboard-data",
          "source_dataset": "OpenEvals/leaderboard-data",
          "source_config": "default",
          "source_split": "train",
          "source_row_count": 105,
          "captured_at": "2026-05-28T11:57:28.766Z",
          "evidence_note": "External model quality prior from OpenEvals; not a RunLocalAI local execution measurement."
        }
      ]
    },
    {
      "model_key": "yi-lightning",
      "display_name": "yi-lightning",
      "provider": null,
      "aliases": [
        "yi-lightning"
      ],
      "openness": null,
      "license": "Proprietary",
      "parameter_billions": null,
      "context_window": null,
      "modalities": [
        "text"
      ],
      "source_ids": [
        "lmarena"
      ],
      "composite": {
        "score_0_100": 69.4,
        "confidence": "low",
        "components": [
          {
            "source_id": "lmarena",
            "metric": "lmarena_text_overall_rating",
            "normalized_0_100": 69.4,
            "weight": 0.35
          }
        ],
        "derived_from": [
          "lmarena:lmarena_text_overall_rating"
        ],
        "limitations": [
          "This is an external model-intelligence prior, not a RunLocalAI local speed or fit measurement.",
          "Provider papers, human-preference ratings, and LiveBench judgments are not interchangeable with quantized local execution.",
          "Use this score only beside Will-It-Run fit, quantization, context, runtime, and measured tok/s evidence."
        ]
      },
      "scores": [
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_chinese_rating",
          "value": 1318.9159321073128,
          "normalized_0_100": 71.47,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 177,
          "lower": 1306.7241280847766,
          "upper": 1331.1077361298492,
          "category": "chinese",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_coding_rating",
          "value": 1312.878222642832,
          "normalized_0_100": 70.75,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 190,
          "lower": 1303.275963589574,
          "upper": 1322.4804816960902,
          "category": "coding",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_overall_rating",
          "value": 1301.5705726279164,
          "normalized_0_100": 69.4,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 191,
          "lower": 1296.6865645126634,
          "upper": 1306.4545807431693,
          "category": "overall",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        }
      ]
    },
    {
      "model_key": "claude-3-7-sonnet-20250219",
      "display_name": "claude-3-7-sonnet-20250219",
      "provider": "anthropic",
      "aliases": [
        "claude-3-7-sonnet-20250219"
      ],
      "openness": null,
      "license": "Proprietary",
      "parameter_billions": null,
      "context_window": null,
      "modalities": [
        "text"
      ],
      "source_ids": [
        "lmarena"
      ],
      "composite": {
        "score_0_100": 69.12,
        "confidence": "low",
        "components": [
          {
            "source_id": "lmarena",
            "metric": "lmarena_text_overall_rating",
            "normalized_0_100": 69.12,
            "weight": 0.35
          }
        ],
        "derived_from": [
          "lmarena:lmarena_text_overall_rating"
        ],
        "limitations": [
          "This is an external model-intelligence prior, not a RunLocalAI local speed or fit measurement.",
          "Provider papers, human-preference ratings, and LiveBench judgments are not interchangeable with quantized local execution.",
          "Use this score only beside Will-It-Run fit, quantization, context, runtime, and measured tok/s evidence."
        ]
      },
      "scores": [
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_chinese_rating",
          "value": 1298.6122956339748,
          "normalized_0_100": 69.05,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 185,
          "lower": 1285.771330960238,
          "upper": 1311.4532603077114,
          "category": "chinese",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_coding_rating",
          "value": 1338.9891441382356,
          "normalized_0_100": 73.85,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 176,
          "lower": 1331.6069108313086,
          "upper": 1346.3713774451626,
          "category": "coding",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_overall_rating",
          "value": 1299.1644081266222,
          "normalized_0_100": 69.12,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 194,
          "lower": 1295.2255220281659,
          "upper": 1303.1032942250786,
          "category": "overall",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        }
      ]
    },
    {
      "model_key": "qwen2-5-plus-1127",
      "display_name": "qwen2.5-plus-1127",
      "provider": "alibaba",
      "aliases": [
        "qwen2.5-plus-1127"
      ],
      "openness": null,
      "license": "Proprietary",
      "parameter_billions": null,
      "context_window": null,
      "modalities": [
        "text"
      ],
      "source_ids": [
        "lmarena"
      ],
      "composite": {
        "score_0_100": 69.12,
        "confidence": "low",
        "components": [
          {
            "source_id": "lmarena",
            "metric": "lmarena_text_overall_rating",
            "normalized_0_100": 69.12,
            "weight": 0.35
          }
        ],
        "derived_from": [
          "lmarena:lmarena_text_overall_rating"
        ],
        "limitations": [
          "This is an external model-intelligence prior, not a RunLocalAI local speed or fit measurement.",
          "Provider papers, human-preference ratings, and LiveBench judgments are not interchangeable with quantized local execution.",
          "Use this score only beside Will-It-Run fit, quantization, context, runtime, and measured tok/s evidence."
        ]
      },
      "scores": [
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_chinese_rating",
          "value": 1312.9873173797619,
          "normalized_0_100": 70.76,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 180,
          "lower": 1290.4973660825433,
          "upper": 1335.4772686769804,
          "category": "chinese",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_coding_rating",
          "value": 1314.7618104226701,
          "normalized_0_100": 70.97,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 189,
          "lower": 1300.9478510116476,
          "upper": 1328.5757698336924,
          "category": "coding",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_overall_rating",
          "value": 1299.1959687540016,
          "normalized_0_100": 69.12,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 193,
          "lower": 1292.8809378610686,
          "upper": 1305.5109996469346,
          "category": "overall",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        }
      ]
    },
    {
      "model_key": "olmo-3-32b-think",
      "display_name": "olmo-3-32b-think",
      "provider": "allenai",
      "aliases": [
        "olmo-3-32b-think"
      ],
      "openness": null,
      "license": "Apache 2.0",
      "parameter_billions": null,
      "context_window": null,
      "modalities": [
        "text"
      ],
      "source_ids": [
        "lmarena"
      ],
      "composite": {
        "score_0_100": 69.06,
        "confidence": "low",
        "components": [
          {
            "source_id": "lmarena",
            "metric": "lmarena_text_overall_rating",
            "normalized_0_100": 69.06,
            "weight": 0.35
          }
        ],
        "derived_from": [
          "lmarena:lmarena_text_overall_rating"
        ],
        "limitations": [
          "This is an external model-intelligence prior, not a RunLocalAI local speed or fit measurement.",
          "Provider papers, human-preference ratings, and LiveBench judgments are not interchangeable with quantized local execution.",
          "Use this score only beside Will-It-Run fit, quantization, context, runtime, and measured tok/s evidence."
        ]
      },
      "scores": [
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_chinese_rating",
          "value": 1298.1506301382076,
          "normalized_0_100": 69,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 187,
          "lower": 1259.7410750925314,
          "upper": 1336.5601851838837,
          "category": "chinese",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_coding_rating",
          "value": 1320.840736609675,
          "normalized_0_100": 71.7,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 185,
          "lower": 1302.5823069076168,
          "upper": 1339.0991663117334,
          "category": "coding",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_overall_rating",
          "value": 1298.7025518617402,
          "normalized_0_100": 69.06,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 195,
          "lower": 1290.5284617097402,
          "upper": 1306.87664201374,
          "category": "overall",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        }
      ]
    },
    {
      "model_key": "qwen-qwen3-4b-thinking-2507",
      "display_name": "Qwen/Qwen3-4B-Thinking-2507",
      "provider": "Qwen",
      "aliases": [
        "Qwen/Qwen3-4B-Thinking-2507"
      ],
      "openness": "open",
      "license": "Unknown",
      "parameter_billions": 4,
      "context_window": null,
      "modalities": [
        "text"
      ],
      "source_ids": [
        "openevals"
      ],
      "composite": {
        "score_0_100": 68.83,
        "confidence": "low",
        "components": [
          {
            "source_id": "openevals",
            "metric": "openevals_aggregate",
            "normalized_0_100": 68.83,
            "weight": 0.45
          }
        ],
        "derived_from": [
          "openevals:openevals_aggregate"
        ],
        "limitations": [
          "This is an external model-intelligence prior, not a RunLocalAI local speed or fit measurement.",
          "Provider papers, human-preference ratings, and LiveBench judgments are not interchangeable with quantized local execution.",
          "Use this score only beside Will-It-Run fit, quantization, context, runtime, and measured tok/s evidence."
        ]
      },
      "scores": [
        {
          "source_id": "openevals",
          "metric": "aime2026",
          "value": 82.5,
          "normalized_0_100": 82.5,
          "scale": "0-100",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/OpenEvals/leaderboard-data",
          "source_dataset": "OpenEvals/leaderboard-data",
          "source_config": "default",
          "source_split": "train",
          "source_row_count": 105,
          "captured_at": "2026-05-28T11:57:28.766Z",
          "evidence_note": "External model quality prior from OpenEvals; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "openevals",
          "metric": "gpqa",
          "value": 65.8,
          "normalized_0_100": 65.8,
          "scale": "0-100",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/OpenEvals/leaderboard-data",
          "source_dataset": "OpenEvals/leaderboard-data",
          "source_config": "default",
          "source_split": "train",
          "source_row_count": 105,
          "captured_at": "2026-05-28T11:57:28.766Z",
          "evidence_note": "External model quality prior from OpenEvals; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "openevals",
          "metric": "hmmt2026",
          "value": 53.03,
          "normalized_0_100": 53.03,
          "scale": "0-100",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/OpenEvals/leaderboard-data",
          "source_dataset": "OpenEvals/leaderboard-data",
          "source_config": "default",
          "source_split": "train",
          "source_row_count": 105,
          "captured_at": "2026-05-28T11:57:28.766Z",
          "evidence_note": "External model quality prior from OpenEvals; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "openevals",
          "metric": "mmlu_pro",
          "value": 74,
          "normalized_0_100": 74,
          "scale": "0-100",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/OpenEvals/leaderboard-data",
          "source_dataset": "OpenEvals/leaderboard-data",
          "source_config": "default",
          "source_split": "train",
          "source_row_count": 105,
          "captured_at": "2026-05-28T11:57:28.766Z",
          "evidence_note": "External model quality prior from OpenEvals; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "openevals",
          "metric": "openevals_aggregate",
          "value": 68.83,
          "normalized_0_100": 68.83,
          "scale": "0-100",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/OpenEvals/leaderboard-data",
          "source_dataset": "OpenEvals/leaderboard-data",
          "source_config": "default",
          "source_split": "train",
          "source_row_count": 105,
          "captured_at": "2026-05-28T11:57:28.766Z",
          "evidence_note": "External model quality prior from OpenEvals; not a RunLocalAI local execution measurement."
        }
      ]
    },
    {
      "model_key": "qwen-qwen3-5-27b",
      "display_name": "Qwen/Qwen3.5-27B",
      "provider": "Qwen",
      "aliases": [
        "Qwen/Qwen3.5-27B"
      ],
      "openness": "open",
      "license": "Unknown",
      "parameter_billions": 27.8,
      "context_window": null,
      "modalities": [
        "text"
      ],
      "source_ids": [
        "openevals"
      ],
      "composite": {
        "score_0_100": 68.83,
        "confidence": "low",
        "components": [
          {
            "source_id": "openevals",
            "metric": "openevals_aggregate",
            "normalized_0_100": 68.83,
            "weight": 0.45
          }
        ],
        "derived_from": [
          "openevals:openevals_aggregate"
        ],
        "limitations": [
          "This is an external model-intelligence prior, not a RunLocalAI local speed or fit measurement.",
          "Provider papers, human-preference ratings, and LiveBench judgments are not interchangeable with quantized local execution.",
          "Use this score only beside Will-It-Run fit, quantization, context, runtime, and measured tok/s evidence."
        ]
      },
      "scores": [
        {
          "source_id": "openevals",
          "metric": "aime2026",
          "value": 90.83,
          "normalized_0_100": 90.83,
          "scale": "0-100",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/OpenEvals/leaderboard-data",
          "source_dataset": "OpenEvals/leaderboard-data",
          "source_config": "default",
          "source_split": "train",
          "source_row_count": 105,
          "captured_at": "2026-05-28T11:57:28.766Z",
          "evidence_note": "External model quality prior from OpenEvals; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "openevals",
          "metric": "gpqa",
          "value": 85.5,
          "normalized_0_100": 85.5,
          "scale": "0-100",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/OpenEvals/leaderboard-data",
          "source_dataset": "OpenEvals/leaderboard-data",
          "source_config": "default",
          "source_split": "train",
          "source_row_count": 105,
          "captured_at": "2026-05-28T11:57:28.766Z",
          "evidence_note": "External model quality prior from OpenEvals; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "openevals",
          "metric": "hle",
          "value": 24.3,
          "normalized_0_100": 24.3,
          "scale": "0-100",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/OpenEvals/leaderboard-data",
          "source_dataset": "OpenEvals/leaderboard-data",
          "source_config": "default",
          "source_split": "train",
          "source_row_count": 105,
          "captured_at": "2026-05-28T11:57:28.766Z",
          "evidence_note": "External model quality prior from OpenEvals; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "openevals",
          "metric": "hmmt2026",
          "value": 81.06,
          "normalized_0_100": 81.06,
          "scale": "0-100",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/OpenEvals/leaderboard-data",
          "source_dataset": "OpenEvals/leaderboard-data",
          "source_config": "default",
          "source_split": "train",
          "source_row_count": 105,
          "captured_at": "2026-05-28T11:57:28.766Z",
          "evidence_note": "External model quality prior from OpenEvals; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "openevals",
          "metric": "mmlu_pro",
          "value": 86.1,
          "normalized_0_100": 86.1,
          "scale": "0-100",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/OpenEvals/leaderboard-data",
          "source_dataset": "OpenEvals/leaderboard-data",
          "source_config": "default",
          "source_split": "train",
          "source_row_count": 105,
          "captured_at": "2026-05-28T11:57:28.766Z",
          "evidence_note": "External model quality prior from OpenEvals; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "openevals",
          "metric": "openevals_aggregate",
          "value": 68.83,
          "normalized_0_100": 68.83,
          "scale": "0-100",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/OpenEvals/leaderboard-data",
          "source_dataset": "OpenEvals/leaderboard-data",
          "source_config": "default",
          "source_split": "train",
          "source_row_count": 105,
          "captured_at": "2026-05-28T11:57:28.766Z",
          "evidence_note": "External model quality prior from OpenEvals; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "openevals",
          "metric": "swe_verified",
          "value": 72.4,
          "normalized_0_100": 72.4,
          "scale": "0-100",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/OpenEvals/leaderboard-data",
          "source_dataset": "OpenEvals/leaderboard-data",
          "source_config": "default",
          "source_split": "train",
          "source_row_count": 105,
          "captured_at": "2026-05-28T11:57:28.766Z",
          "evidence_note": "External model quality prior from OpenEvals; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "openevals",
          "metric": "terminal_bench",
          "value": 41.6,
          "normalized_0_100": 41.6,
          "scale": "0-100",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/OpenEvals/leaderboard-data",
          "source_dataset": "OpenEvals/leaderboard-data",
          "source_config": "default",
          "source_split": "train",
          "source_row_count": 105,
          "captured_at": "2026-05-28T11:57:28.766Z",
          "evidence_note": "External model quality prior from OpenEvals; not a RunLocalAI local execution measurement."
        }
      ]
    },
    {
      "model_key": "molmo-2-8b",
      "display_name": "molmo-2-8b",
      "provider": "allenai",
      "aliases": [
        "molmo-2-8b"
      ],
      "openness": null,
      "license": "Apache 2.0",
      "parameter_billions": null,
      "context_window": null,
      "modalities": [
        "text"
      ],
      "source_ids": [
        "lmarena"
      ],
      "composite": {
        "score_0_100": 68.57,
        "confidence": "low",
        "components": [
          {
            "source_id": "lmarena",
            "metric": "lmarena_text_overall_rating",
            "normalized_0_100": 68.57,
            "weight": 0.35
          }
        ],
        "derived_from": [
          "lmarena:lmarena_text_overall_rating"
        ],
        "limitations": [
          "This is an external model-intelligence prior, not a RunLocalAI local speed or fit measurement.",
          "Provider papers, human-preference ratings, and LiveBench judgments are not interchangeable with quantized local execution.",
          "Use this score only beside Will-It-Run fit, quantization, context, runtime, and measured tok/s evidence."
        ]
      },
      "scores": [
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_overall_rating",
          "value": 1294.576924071958,
          "normalized_0_100": 68.57,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 197,
          "lower": 1273.5967421949113,
          "upper": 1315.5571059490048,
          "category": "overall",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        }
      ]
    },
    {
      "model_key": "deepseek-v2-5-1210",
      "display_name": "deepseek-v2.5-1210",
      "provider": "deepseek",
      "aliases": [
        "deepseek-v2.5-1210"
      ],
      "openness": null,
      "license": "DeepSeek",
      "parameter_billions": null,
      "context_window": null,
      "modalities": [
        "text"
      ],
      "source_ids": [
        "lmarena"
      ],
      "composite": {
        "score_0_100": 68.5,
        "confidence": "low",
        "components": [
          {
            "source_id": "lmarena",
            "metric": "lmarena_text_overall_rating",
            "normalized_0_100": 68.5,
            "weight": 0.35
          }
        ],
        "derived_from": [
          "lmarena:lmarena_text_overall_rating"
        ],
        "limitations": [
          "This is an external model-intelligence prior, not a RunLocalAI local speed or fit measurement.",
          "Provider papers, human-preference ratings, and LiveBench judgments are not interchangeable with quantized local execution.",
          "Use this score only beside Will-It-Run fit, quantization, context, runtime, and measured tok/s evidence."
        ]
      },
      "scores": [
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_chinese_rating",
          "value": 1316.6005491674719,
          "normalized_0_100": 71.19,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 178,
          "lower": 1290.5447505874365,
          "upper": 1342.6563477475072,
          "category": "chinese",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_coding_rating",
          "value": 1309.9456960389903,
          "normalized_0_100": 70.4,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 194,
          "lower": 1293.178243045913,
          "upper": 1326.7131490320678,
          "category": "coding",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_overall_rating",
          "value": 1293.9906769420013,
          "normalized_0_100": 68.5,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 198,
          "lower": 1285.7677062154519,
          "upper": 1302.2136476685507,
          "category": "overall",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        }
      ]
    },
    {
      "model_key": "granite-4-1-8b",
      "display_name": "granite-4.1-8b",
      "provider": "ibm",
      "aliases": [
        "granite-4.1-8b"
      ],
      "openness": null,
      "license": "Apache 2.0",
      "parameter_billions": null,
      "context_window": null,
      "modalities": [
        "text"
      ],
      "source_ids": [
        "lmarena"
      ],
      "composite": {
        "score_0_100": 68.39,
        "confidence": "low",
        "components": [
          {
            "source_id": "lmarena",
            "metric": "lmarena_text_overall_rating",
            "normalized_0_100": 68.39,
            "weight": 0.35
          }
        ],
        "derived_from": [
          "lmarena:lmarena_text_overall_rating"
        ],
        "limitations": [
          "This is an external model-intelligence prior, not a RunLocalAI local speed or fit measurement.",
          "Provider papers, human-preference ratings, and LiveBench judgments are not interchangeable with quantized local execution.",
          "Use this score only beside Will-It-Run fit, quantization, context, runtime, and measured tok/s evidence."
        ]
      },
      "scores": [
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_coding_rating",
          "value": 1309.2207948375535,
          "normalized_0_100": 70.31,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 195,
          "lower": 1287.9189456795734,
          "upper": 1330.5226439955336,
          "category": "coding",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_overall_rating",
          "value": 1293.0038774459083,
          "normalized_0_100": 68.39,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 199,
          "lower": 1282.3697555996544,
          "upper": 1303.637999292162,
          "category": "overall",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        }
      ]
    },
    {
      "model_key": "athene-v2-chat",
      "display_name": "athene-v2-chat",
      "provider": null,
      "aliases": [
        "athene-v2-chat"
      ],
      "openness": null,
      "license": "NexusFlow",
      "parameter_billions": null,
      "context_window": null,
      "modalities": [
        "text"
      ],
      "source_ids": [
        "lmarena"
      ],
      "composite": {
        "score_0_100": 68.21,
        "confidence": "low",
        "components": [
          {
            "source_id": "lmarena",
            "metric": "lmarena_text_overall_rating",
            "normalized_0_100": 68.21,
            "weight": 0.35
          }
        ],
        "derived_from": [
          "lmarena:lmarena_text_overall_rating"
        ],
        "limitations": [
          "This is an external model-intelligence prior, not a RunLocalAI local speed or fit measurement.",
          "Provider papers, human-preference ratings, and LiveBench judgments are not interchangeable with quantized local execution.",
          "Use this score only beside Will-It-Run fit, quantization, context, runtime, and measured tok/s evidence."
        ]
      },
      "scores": [
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_chinese_rating",
          "value": 1301.0151422886113,
          "normalized_0_100": 69.34,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 184,
          "lower": 1287.2314216174,
          "upper": 1314.7988629598226,
          "category": "chinese",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_coding_rating",
          "value": 1312.6858750606002,
          "normalized_0_100": 70.73,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 191,
          "lower": 1303.6511290466135,
          "upper": 1321.7206210745867,
          "category": "coding",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_overall_rating",
          "value": 1291.5048568686182,
          "normalized_0_100": 68.21,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 200,
          "lower": 1286.9889031093012,
          "upper": 1296.0208106279351,
          "category": "overall",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        }
      ]
    },
    {
      "model_key": "qwen-qwen3-5-35b-a3b",
      "display_name": "Qwen/Qwen3.5-35B-A3B",
      "provider": "Qwen",
      "aliases": [
        "Qwen/Qwen3.5-35B-A3B"
      ],
      "openness": "open",
      "license": "Unknown",
      "parameter_billions": 36,
      "context_window": null,
      "modalities": [
        "text"
      ],
      "source_ids": [
        "openevals"
      ],
      "composite": {
        "score_0_100": 68.11,
        "confidence": "low",
        "components": [
          {
            "source_id": "openevals",
            "metric": "openevals_aggregate",
            "normalized_0_100": 68.11,
            "weight": 0.45
          }
        ],
        "derived_from": [
          "openevals:openevals_aggregate"
        ],
        "limitations": [
          "This is an external model-intelligence prior, not a RunLocalAI local speed or fit measurement.",
          "Provider papers, human-preference ratings, and LiveBench judgments are not interchangeable with quantized local execution.",
          "Use this score only beside Will-It-Run fit, quantization, context, runtime, and measured tok/s evidence."
        ]
      },
      "scores": [
        {
          "source_id": "openevals",
          "metric": "aime2026",
          "value": 93.33,
          "normalized_0_100": 93.33,
          "scale": "0-100",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/OpenEvals/leaderboard-data",
          "source_dataset": "OpenEvals/leaderboard-data",
          "source_config": "default",
          "source_split": "train",
          "source_row_count": 105,
          "captured_at": "2026-05-28T11:57:28.766Z",
          "evidence_note": "External model quality prior from OpenEvals; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "openevals",
          "metric": "gpqa",
          "value": 84.2,
          "normalized_0_100": 84.2,
          "scale": "0-100",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/OpenEvals/leaderboard-data",
          "source_dataset": "OpenEvals/leaderboard-data",
          "source_config": "default",
          "source_split": "train",
          "source_row_count": 105,
          "captured_at": "2026-05-28T11:57:28.766Z",
          "evidence_note": "External model quality prior from OpenEvals; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "openevals",
          "metric": "hle",
          "value": 22.4,
          "normalized_0_100": 22.4,
          "scale": "0-100",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/OpenEvals/leaderboard-data",
          "source_dataset": "OpenEvals/leaderboard-data",
          "source_config": "default",
          "source_split": "train",
          "source_row_count": 105,
          "captured_at": "2026-05-28T11:57:28.766Z",
          "evidence_note": "External model quality prior from OpenEvals; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "openevals",
          "metric": "hmmt2026",
          "value": 81.82,
          "normalized_0_100": 81.82,
          "scale": "0-100",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/OpenEvals/leaderboard-data",
          "source_dataset": "OpenEvals/leaderboard-data",
          "source_config": "default",
          "source_split": "train",
          "source_row_count": 105,
          "captured_at": "2026-05-28T11:57:28.766Z",
          "evidence_note": "External model quality prior from OpenEvals; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "openevals",
          "metric": "mmlu_pro",
          "value": 85.3,
          "normalized_0_100": 85.3,
          "scale": "0-100",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/OpenEvals/leaderboard-data",
          "source_dataset": "OpenEvals/leaderboard-data",
          "source_config": "default",
          "source_split": "train",
          "source_row_count": 105,
          "captured_at": "2026-05-28T11:57:28.766Z",
          "evidence_note": "External model quality prior from OpenEvals; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "openevals",
          "metric": "openevals_aggregate",
          "value": 68.11,
          "normalized_0_100": 68.11,
          "scale": "0-100",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/OpenEvals/leaderboard-data",
          "source_dataset": "OpenEvals/leaderboard-data",
          "source_config": "default",
          "source_split": "train",
          "source_row_count": 105,
          "captured_at": "2026-05-28T11:57:28.766Z",
          "evidence_note": "External model quality prior from OpenEvals; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "openevals",
          "metric": "swe_verified",
          "value": 69.2,
          "normalized_0_100": 69.2,
          "scale": "0-100",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/OpenEvals/leaderboard-data",
          "source_dataset": "OpenEvals/leaderboard-data",
          "source_config": "default",
          "source_split": "train",
          "source_row_count": 105,
          "captured_at": "2026-05-28T11:57:28.766Z",
          "evidence_note": "External model quality prior from OpenEvals; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "openevals",
          "metric": "terminal_bench",
          "value": 40.5,
          "normalized_0_100": 40.5,
          "scale": "0-100",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/OpenEvals/leaderboard-data",
          "source_dataset": "OpenEvals/leaderboard-data",
          "source_config": "default",
          "source_split": "train",
          "source_row_count": 105,
          "captured_at": "2026-05-28T11:57:28.766Z",
          "evidence_note": "External model quality prior from OpenEvals; not a RunLocalAI local execution measurement."
        }
      ]
    },
    {
      "model_key": "glm-4-plus",
      "display_name": "glm-4-plus",
      "provider": "zai",
      "aliases": [
        "glm-4-plus"
      ],
      "openness": null,
      "license": "Proprietary",
      "parameter_billions": null,
      "context_window": null,
      "modalities": [
        "text"
      ],
      "source_ids": [
        "lmarena"
      ],
      "composite": {
        "score_0_100": 67.99,
        "confidence": "low",
        "components": [
          {
            "source_id": "lmarena",
            "metric": "lmarena_text_overall_rating",
            "normalized_0_100": 67.99,
            "weight": 0.35
          }
        ],
        "derived_from": [
          "lmarena:lmarena_text_overall_rating"
        ],
        "limitations": [
          "This is an external model-intelligence prior, not a RunLocalAI local speed or fit measurement.",
          "Provider papers, human-preference ratings, and LiveBench judgments are not interchangeable with quantized local execution.",
          "Use this score only beside Will-It-Run fit, quantization, context, runtime, and measured tok/s evidence."
        ]
      },
      "scores": [
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_chinese_rating",
          "value": 1294.927516560106,
          "normalized_0_100": 68.61,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 188,
          "lower": 1282.175371188869,
          "upper": 1307.679661931343,
          "category": "chinese",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_coding_rating",
          "value": 1290.9682390655064,
          "normalized_0_100": 68.14,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 208,
          "lower": 1281.6960850835658,
          "upper": 1300.2403930474468,
          "category": "coding",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_overall_rating",
          "value": 1289.6926598633079,
          "normalized_0_100": 67.99,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 202,
          "lower": 1284.80719363741,
          "upper": 1294.5781260892059,
          "category": "overall",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        }
      ]
    },
    {
      "model_key": "hunyuan-large-2025-02-10",
      "display_name": "hunyuan-large-2025-02-10",
      "provider": "tencent",
      "aliases": [
        "hunyuan-large-2025-02-10"
      ],
      "openness": null,
      "license": "Proprietary",
      "parameter_billions": null,
      "context_window": null,
      "modalities": [
        "text"
      ],
      "source_ids": [
        "lmarena"
      ],
      "composite": {
        "score_0_100": 67.79,
        "confidence": "low",
        "components": [
          {
            "source_id": "lmarena",
            "metric": "lmarena_text_overall_rating",
            "normalized_0_100": 67.79,
            "weight": 0.35
          }
        ],
        "derived_from": [
          "lmarena:lmarena_text_overall_rating"
        ],
        "limitations": [
          "This is an external model-intelligence prior, not a RunLocalAI local speed or fit measurement.",
          "Provider papers, human-preference ratings, and LiveBench judgments are not interchangeable with quantized local execution.",
          "Use this score only beside Will-It-Run fit, quantization, context, runtime, and measured tok/s evidence."
        ]
      },
      "scores": [
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_chinese_rating",
          "value": 1345.6850229157403,
          "normalized_0_100": 74.65,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 162,
          "lower": 1312.9921079392893,
          "upper": 1378.377937892191,
          "category": "chinese",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_coding_rating",
          "value": 1307.2260334895564,
          "normalized_0_100": 70.08,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 197,
          "lower": 1282.8077827455766,
          "upper": 1331.6442842335364,
          "category": "coding",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_overall_rating",
          "value": 1287.969331840191,
          "normalized_0_100": 67.79,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 203,
          "lower": 1278.2174813032543,
          "upper": 1297.7211823771277,
          "category": "overall",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        }
      ]
    },
    {
      "model_key": "gpt-oss-20b",
      "display_name": "gpt-oss-20b",
      "provider": "openai",
      "aliases": [
        "gpt-oss-20b"
      ],
      "openness": null,
      "license": "Apache 2.0",
      "parameter_billions": null,
      "context_window": null,
      "modalities": [
        "text"
      ],
      "source_ids": [
        "lmarena"
      ],
      "composite": {
        "score_0_100": 67.72,
        "confidence": "low",
        "components": [
          {
            "source_id": "lmarena",
            "metric": "lmarena_text_overall_rating",
            "normalized_0_100": 67.72,
            "weight": 0.35
          }
        ],
        "derived_from": [
          "lmarena:lmarena_text_overall_rating"
        ],
        "limitations": [
          "This is an external model-intelligence prior, not a RunLocalAI local speed or fit measurement.",
          "Provider papers, human-preference ratings, and LiveBench judgments are not interchangeable with quantized local execution.",
          "Use this score only beside Will-It-Run fit, quantization, context, runtime, and measured tok/s evidence."
        ]
      },
      "scores": [
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_chinese_rating",
          "value": 1310.7868716553185,
          "normalized_0_100": 70.5,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 181,
          "lower": 1283.962663153579,
          "upper": 1337.6110801570578,
          "category": "chinese",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_coding_rating",
          "value": 1308.236202815051,
          "normalized_0_100": 70.2,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 196,
          "lower": 1295.246170906021,
          "upper": 1321.2262347240812,
          "category": "coding",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_overall_rating",
          "value": 1287.4281765375306,
          "normalized_0_100": 67.72,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 205,
          "lower": 1281.0878979284948,
          "upper": 1293.768455146566,
          "category": "overall",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        }
      ]
    },
    {
      "model_key": "gpt-4-1-nano-2025-04-14",
      "display_name": "gpt-4.1-nano-2025-04-14",
      "provider": "openai",
      "aliases": [
        "gpt-4.1-nano-2025-04-14"
      ],
      "openness": null,
      "license": "Proprietary",
      "parameter_billions": null,
      "context_window": null,
      "modalities": [
        "text"
      ],
      "source_ids": [
        "lmarena"
      ],
      "composite": {
        "score_0_100": 67.4,
        "confidence": "low",
        "components": [
          {
            "source_id": "lmarena",
            "metric": "lmarena_text_overall_rating",
            "normalized_0_100": 67.4,
            "weight": 0.35
          }
        ],
        "derived_from": [
          "lmarena:lmarena_text_overall_rating"
        ],
        "limitations": [
          "This is an external model-intelligence prior, not a RunLocalAI local speed or fit measurement.",
          "Provider papers, human-preference ratings, and LiveBench judgments are not interchangeable with quantized local execution.",
          "Use this score only beside Will-It-Run fit, quantization, context, runtime, and measured tok/s evidence."
        ]
      },
      "scores": [
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_chinese_rating",
          "value": 1270.6189570735128,
          "normalized_0_100": 65.72,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 199,
          "lower": 1234.5019782571835,
          "upper": 1306.7359358898423,
          "category": "chinese",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_coding_rating",
          "value": 1306.243117894723,
          "normalized_0_100": 69.96,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 200,
          "lower": 1286.9293355916338,
          "upper": 1325.556900197812,
          "category": "coding",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_overall_rating",
          "value": 1284.7480900901046,
          "normalized_0_100": 67.4,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 208,
          "lower": 1277.1067785673029,
          "upper": 1292.389401612906,
          "category": "overall",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        }
      ]
    },
    {
      "model_key": "llama-3-1-405b-instruct-bf16",
      "display_name": "llama-3.1-405b-instruct-bf16",
      "provider": "meta",
      "aliases": [
        "llama-3.1-405b-instruct-bf16"
      ],
      "openness": null,
      "license": "Llama 3.1 Community",
      "parameter_billions": null,
      "context_window": null,
      "modalities": [
        "text"
      ],
      "source_ids": [
        "lmarena"
      ],
      "composite": {
        "score_0_100": 67.28,
        "confidence": "low",
        "components": [
          {
            "source_id": "lmarena",
            "metric": "lmarena_text_overall_rating",
            "normalized_0_100": 67.28,
            "weight": 0.35
          }
        ],
        "derived_from": [
          "lmarena:lmarena_text_overall_rating"
        ],
        "limitations": [
          "This is an external model-intelligence prior, not a RunLocalAI local speed or fit measurement.",
          "Provider papers, human-preference ratings, and LiveBench judgments are not interchangeable with quantized local execution.",
          "Use this score only beside Will-It-Run fit, quantization, context, runtime, and measured tok/s evidence."
        ]
      },
      "scores": [
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_chinese_rating",
          "value": 1241.3732839136164,
          "normalized_0_100": 62.25,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 215,
          "lower": 1230.5042688969784,
          "upper": 1252.2422989302547,
          "category": "chinese",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_coding_rating",
          "value": 1291.8483463270752,
          "normalized_0_100": 68.25,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 207,
          "lower": 1284.426326734585,
          "upper": 1299.2703659195654,
          "category": "coding",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_overall_rating",
          "value": 1283.716079820117,
          "normalized_0_100": 67.28,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 209,
          "lower": 1280.0580691522266,
          "upper": 1287.3740904880074,
          "category": "overall",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        }
      ]
    },
    {
      "model_key": "mercury",
      "display_name": "mercury",
      "provider": "inception-ai",
      "aliases": [
        "mercury"
      ],
      "openness": null,
      "license": "Proprietary",
      "parameter_billions": null,
      "context_window": null,
      "modalities": [
        "text"
      ],
      "source_ids": [
        "lmarena"
      ],
      "composite": {
        "score_0_100": 67.1,
        "confidence": "low",
        "components": [
          {
            "source_id": "lmarena",
            "metric": "lmarena_text_overall_rating",
            "normalized_0_100": 67.1,
            "weight": 0.35
          }
        ],
        "derived_from": [
          "lmarena:lmarena_text_overall_rating"
        ],
        "limitations": [
          "This is an external model-intelligence prior, not a RunLocalAI local speed or fit measurement.",
          "Provider papers, human-preference ratings, and LiveBench judgments are not interchangeable with quantized local execution.",
          "Use this score only beside Will-It-Run fit, quantization, context, runtime, and measured tok/s evidence."
        ]
      },
      "scores": [
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_coding_rating",
          "value": 1326.1007049844532,
          "normalized_0_100": 72.32,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 181,
          "lower": 1297.2197205679247,
          "upper": 1354.9816894009818,
          "category": "coding",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_overall_rating",
          "value": 1282.209897531445,
          "normalized_0_100": 67.1,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 212,
          "lower": 1268.4153322888228,
          "upper": 1296.0044627740672,
          "category": "overall",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        }
      ]
    },
    {
      "model_key": "qwen-max-0919",
      "display_name": "qwen-max-0919",
      "provider": "alibaba",
      "aliases": [
        "qwen-max-0919"
      ],
      "openness": null,
      "license": "Qwen",
      "parameter_billions": null,
      "context_window": null,
      "modalities": [
        "text"
      ],
      "source_ids": [
        "lmarena"
      ],
      "composite": {
        "score_0_100": 67.07,
        "confidence": "low",
        "components": [
          {
            "source_id": "lmarena",
            "metric": "lmarena_text_overall_rating",
            "normalized_0_100": 67.07,
            "weight": 0.35
          }
        ],
        "derived_from": [
          "lmarena:lmarena_text_overall_rating"
        ],
        "limitations": [
          "This is an external model-intelligence prior, not a RunLocalAI local speed or fit measurement.",
          "Provider papers, human-preference ratings, and LiveBench judgments are not interchangeable with quantized local execution.",
          "Use this score only beside Will-It-Run fit, quantization, context, runtime, and measured tok/s evidence."
        ]
      },
      "scores": [
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_chinese_rating",
          "value": 1253.044180263042,
          "normalized_0_100": 63.63,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 206,
          "lower": 1237.542746391921,
          "upper": 1268.5456141341633,
          "category": "chinese",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_coding_rating",
          "value": 1288.5907955849736,
          "normalized_0_100": 67.86,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 210,
          "lower": 1277.476470889943,
          "upper": 1299.7051202800044,
          "category": "coding",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_overall_rating",
          "value": 1281.9197447528966,
          "normalized_0_100": 67.07,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 213,
          "lower": 1276.2543775776307,
          "upper": 1287.5851119281624,
          "category": "overall",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        }
      ]
    },
    {
      "model_key": "llama-3-1-405b-instruct-fp8",
      "display_name": "llama-3.1-405b-instruct-fp8",
      "provider": "meta",
      "aliases": [
        "llama-3.1-405b-instruct-fp8"
      ],
      "openness": null,
      "license": "Llama 3.1 Community",
      "parameter_billions": null,
      "context_window": null,
      "modalities": [
        "text"
      ],
      "source_ids": [
        "lmarena"
      ],
      "composite": {
        "score_0_100": 67.06,
        "confidence": "low",
        "components": [
          {
            "source_id": "lmarena",
            "metric": "lmarena_text_overall_rating",
            "normalized_0_100": 67.06,
            "weight": 0.35
          }
        ],
        "derived_from": [
          "lmarena:lmarena_text_overall_rating"
        ],
        "limitations": [
          "This is an external model-intelligence prior, not a RunLocalAI local speed or fit measurement.",
          "Provider papers, human-preference ratings, and LiveBench judgments are not interchangeable with quantized local execution.",
          "Use this score only beside Will-It-Run fit, quantization, context, runtime, and measured tok/s evidence."
        ]
      },
      "scores": [
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_chinese_rating",
          "value": 1233.4595154334302,
          "normalized_0_100": 61.3,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 224,
          "lower": 1223.9570059305368,
          "upper": 1242.9620249363236,
          "category": "chinese",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_coding_rating",
          "value": 1284.034717922998,
          "normalized_0_100": 67.32,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 216,
          "lower": 1276.971332799837,
          "upper": 1291.0981030461594,
          "category": "coding",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_overall_rating",
          "value": 1281.8510239010643,
          "normalized_0_100": 67.06,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 214,
          "lower": 1278.3050060677265,
          "upper": 1285.3970417344021,
          "category": "overall",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        }
      ]
    },
    {
      "model_key": "grok-2-mini-2024-08-13",
      "display_name": "grok-2-mini-2024-08-13",
      "provider": "xai",
      "aliases": [
        "grok-2-mini-2024-08-13"
      ],
      "openness": null,
      "license": "Proprietary",
      "parameter_billions": null,
      "context_window": null,
      "modalities": [
        "text"
      ],
      "source_ids": [
        "lmarena"
      ],
      "composite": {
        "score_0_100": 66.93,
        "confidence": "low",
        "components": [
          {
            "source_id": "lmarena",
            "metric": "lmarena_text_overall_rating",
            "normalized_0_100": 66.93,
            "weight": 0.35
          }
        ],
        "derived_from": [
          "lmarena:lmarena_text_overall_rating"
        ],
        "limitations": [
          "This is an external model-intelligence prior, not a RunLocalAI local speed or fit measurement.",
          "Provider papers, human-preference ratings, and LiveBench judgments are not interchangeable with quantized local execution.",
          "Use this score only beside Will-It-Run fit, quantization, context, runtime, and measured tok/s evidence."
        ]
      },
      "scores": [
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_chinese_rating",
          "value": 1261.4807617485951,
          "normalized_0_100": 64.64,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 204,
          "lower": 1251.6926459382457,
          "upper": 1271.2688775589447,
          "category": "chinese",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_coding_rating",
          "value": 1269.0883140883634,
          "normalized_0_100": 65.54,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 226,
          "lower": 1261.966251414744,
          "upper": 1276.2103767619828,
          "category": "coding",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_overall_rating",
          "value": 1280.7418585769115,
          "normalized_0_100": 66.93,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 216,
          "lower": 1277.066232647444,
          "upper": 1284.417484506379,
          "category": "overall",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        }
      ]
    },
    {
      "model_key": "llama-4-scout-17b-16e-instruct",
      "display_name": "llama-4-scout-17b-16e-instruct",
      "provider": "meta",
      "aliases": [
        "llama-4-scout-17b-16e-instruct"
      ],
      "openness": null,
      "license": "Llama",
      "parameter_billions": null,
      "context_window": null,
      "modalities": [
        "text"
      ],
      "source_ids": [
        "lmarena"
      ],
      "composite": {
        "score_0_100": 66.93,
        "confidence": "low",
        "components": [
          {
            "source_id": "lmarena",
            "metric": "lmarena_text_overall_rating",
            "normalized_0_100": 66.93,
            "weight": 0.35
          }
        ],
        "derived_from": [
          "lmarena:lmarena_text_overall_rating"
        ],
        "limitations": [
          "This is an external model-intelligence prior, not a RunLocalAI local speed or fit measurement.",
          "Provider papers, human-preference ratings, and LiveBench judgments are not interchangeable with quantized local execution.",
          "Use this score only beside Will-It-Run fit, quantization, context, runtime, and measured tok/s evidence."
        ]
      },
      "scores": [
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_chinese_rating",
          "value": 1252.802648241046,
          "normalized_0_100": 63.6,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 207,
          "lower": 1237.1878635357625,
          "upper": 1268.4174329463294,
          "category": "chinese",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_coding_rating",
          "value": 1286.2781392689812,
          "normalized_0_100": 67.59,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 215,
          "lower": 1277.7700821943263,
          "upper": 1294.7861963436362,
          "category": "coding",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_overall_rating",
          "value": 1280.7389759363596,
          "normalized_0_100": 66.93,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 217,
          "lower": 1276.0645849383664,
          "upper": 1285.4133669343526,
          "category": "overall",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        }
      ]
    },
    {
      "model_key": "moonshotai-kimi-k2-instruct-0905",
      "display_name": "moonshotai/Kimi-K2-Instruct-0905",
      "provider": "moonshotai",
      "aliases": [
        "moonshotai/Kimi-K2-Instruct-0905"
      ],
      "openness": "open",
      "license": "Unknown",
      "parameter_billions": 1026.5,
      "context_window": null,
      "modalities": [
        "text"
      ],
      "source_ids": [
        "openevals"
      ],
      "composite": {
        "score_0_100": 66.68,
        "confidence": "low",
        "components": [
          {
            "source_id": "openevals",
            "metric": "openevals_aggregate",
            "normalized_0_100": 66.68,
            "weight": 0.45
          }
        ],
        "derived_from": [
          "openevals:openevals_aggregate"
        ],
        "limitations": [
          "This is an external model-intelligence prior, not a RunLocalAI local speed or fit measurement.",
          "Provider papers, human-preference ratings, and LiveBench judgments are not interchangeable with quantized local execution.",
          "Use this score only beside Will-It-Run fit, quantization, context, runtime, and measured tok/s evidence."
        ]
      },
      "scores": [
        {
          "source_id": "openevals",
          "metric": "evasion_bench",
          "value": 66.68,
          "normalized_0_100": 66.68,
          "scale": "0-100",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/OpenEvals/leaderboard-data",
          "source_dataset": "OpenEvals/leaderboard-data",
          "source_config": "default",
          "source_split": "train",
          "source_row_count": 105,
          "captured_at": "2026-05-28T11:57:28.766Z",
          "evidence_note": "External model quality prior from OpenEvals; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "openevals",
          "metric": "openevals_aggregate",
          "value": 66.68,
          "normalized_0_100": 66.68,
          "scale": "0-100",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/OpenEvals/leaderboard-data",
          "source_dataset": "OpenEvals/leaderboard-data",
          "source_config": "default",
          "source_split": "train",
          "source_row_count": 105,
          "captured_at": "2026-05-28T11:57:28.766Z",
          "evidence_note": "External model quality prior from OpenEvals; not a RunLocalAI local execution measurement."
        }
      ]
    },
    {
      "model_key": "command-r-plus-04-2024",
      "display_name": "command-r-plus-04-2024",
      "provider": null,
      "aliases": [
        "command-r-plus-04-2024"
      ],
      "openness": null,
      "license": null,
      "parameter_billions": null,
      "context_window": null,
      "modalities": [
        "text"
      ],
      "source_ids": [
        "livebench"
      ],
      "composite": {
        "score_0_100": 66.67,
        "confidence": "low",
        "components": [
          {
            "source_id": "livebench",
            "metric": "livebench_overall_mean_judgment",
            "normalized_0_100": 66.67,
            "weight": 0.2
          }
        ],
        "derived_from": [
          "livebench:livebench_overall_mean_judgment"
        ],
        "limitations": [
          "This is an external model-intelligence prior, not a RunLocalAI local speed or fit measurement.",
          "Provider papers, human-preference ratings, and LiveBench judgments are not interchangeable with quantized local execution.",
          "Use this score only beside Will-It-Run fit, quantization, context, runtime, and measured tok/s evidence."
        ]
      },
      "scores": [
        {
          "source_id": "livebench",
          "metric": "livebench_category_coding_mean_judgment",
          "value": 1,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "coding",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench category aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_category_instruction_following_mean_judgment",
          "value": 0.5,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "instruction_following",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench category aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_category_language_mean_judgment",
          "value": 0.5,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "language",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench category aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_overall_mean_judgment",
          "value": 0.666667,
          "normalized_0_100": 66.67,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench judgment aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_task_coding_completion_mean_judgment",
          "value": 1,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "coding",
          "task": "coding_completion",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench task aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_task_connections_mean_judgment",
          "value": 0,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "language",
          "task": "connections",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench task aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_task_lcb_generation_mean_judgment",
          "value": 1,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "coding",
          "task": "LCB_generation",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench task aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_task_paraphrase_mean_judgment",
          "value": 0,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "instruction_following",
          "task": "paraphrase",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench task aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_task_story_generation_mean_judgment",
          "value": 1,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "instruction_following",
          "task": "story_generation",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench task aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_task_typos_mean_judgment",
          "value": 1,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "language",
          "task": "typos",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench task aggregate; not a RunLocalAI local execution measurement."
        }
      ]
    },
    {
      "model_key": "gemini-advanced-0514",
      "display_name": "gemini-advanced-0514",
      "provider": "google",
      "aliases": [
        "gemini-advanced-0514"
      ],
      "openness": null,
      "license": "Proprietary",
      "parameter_billions": null,
      "context_window": null,
      "modalities": [
        "text"
      ],
      "source_ids": [
        "lmarena"
      ],
      "composite": {
        "score_0_100": 66.67,
        "confidence": "low",
        "components": [
          {
            "source_id": "lmarena",
            "metric": "lmarena_text_overall_rating",
            "normalized_0_100": 66.67,
            "weight": 0.35
          }
        ],
        "derived_from": [
          "lmarena:lmarena_text_overall_rating"
        ],
        "limitations": [
          "This is an external model-intelligence prior, not a RunLocalAI local speed or fit measurement.",
          "Provider papers, human-preference ratings, and LiveBench judgments are not interchangeable with quantized local execution.",
          "Use this score only beside Will-It-Run fit, quantization, context, runtime, and measured tok/s evidence."
        ]
      },
      "scores": [
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_chinese_rating",
          "value": 1273.0263012395108,
          "normalized_0_100": 66.01,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 196,
          "lower": 1261.9774811612588,
          "upper": 1284.0751213177628,
          "category": "chinese",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_coding_rating",
          "value": 1255.3127111682074,
          "normalized_0_100": 63.9,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 234,
          "lower": 1246.3926368539046,
          "upper": 1264.23278548251,
          "category": "coding",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_overall_rating",
          "value": 1278.6030909203969,
          "normalized_0_100": 66.67,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 218,
          "lower": 1273.5104933820548,
          "upper": 1283.6956884587391,
          "category": "overall",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        }
      ]
    },
    {
      "model_key": "mistral-small-3-1-24b-instruct-2503",
      "display_name": "mistral-small-3.1-24b-instruct-2503",
      "provider": "mistral",
      "aliases": [
        "mistral-small-3.1-24b-instruct-2503"
      ],
      "openness": null,
      "license": "Apache 2.0",
      "parameter_billions": null,
      "context_window": null,
      "modalities": [
        "text"
      ],
      "source_ids": [
        "lmarena"
      ],
      "composite": {
        "score_0_100": 66.57,
        "confidence": "low",
        "components": [
          {
            "source_id": "lmarena",
            "metric": "lmarena_text_overall_rating",
            "normalized_0_100": 66.57,
            "weight": 0.35
          }
        ],
        "derived_from": [
          "lmarena:lmarena_text_overall_rating"
        ],
        "limitations": [
          "This is an external model-intelligence prior, not a RunLocalAI local speed or fit measurement.",
          "Provider papers, human-preference ratings, and LiveBench judgments are not interchangeable with quantized local execution.",
          "Use this score only beside Will-It-Run fit, quantization, context, runtime, and measured tok/s evidence."
        ]
      },
      "scores": [
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_chinese_rating",
          "value": 1246.0305642683827,
          "normalized_0_100": 62.8,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 212,
          "lower": 1230.6918964479914,
          "upper": 1261.369232088774,
          "category": "chinese",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_coding_rating",
          "value": 1310.0421144348802,
          "normalized_0_100": 70.41,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 193,
          "lower": 1302.1633726697128,
          "upper": 1317.9208562000476,
          "category": "coding",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_overall_rating",
          "value": 1277.7417764151094,
          "normalized_0_100": 66.57,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 219,
          "lower": 1273.249002974537,
          "upper": 1282.234549855682,
          "category": "overall",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        }
      ]
    },
    {
      "model_key": "deepseek-chat",
      "display_name": "deepseek-chat",
      "provider": null,
      "aliases": [
        "deepseek-chat"
      ],
      "openness": null,
      "license": null,
      "parameter_billions": null,
      "context_window": null,
      "modalities": [
        "text"
      ],
      "source_ids": [
        "livebench"
      ],
      "composite": {
        "score_0_100": 66.25,
        "confidence": "low",
        "components": [
          {
            "source_id": "livebench",
            "metric": "livebench_overall_mean_judgment",
            "normalized_0_100": 66.25,
            "weight": 0.2
          }
        ],
        "derived_from": [
          "livebench:livebench_overall_mean_judgment"
        ],
        "limitations": [
          "This is an external model-intelligence prior, not a RunLocalAI local speed or fit measurement.",
          "Provider papers, human-preference ratings, and LiveBench judgments are not interchangeable with quantized local execution.",
          "Use this score only beside Will-It-Run fit, quantization, context, runtime, and measured tok/s evidence."
        ]
      },
      "scores": [
        {
          "source_id": "livebench",
          "metric": "livebench_category_coding_mean_judgment",
          "value": 0.75,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "coding",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench category aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_category_instruction_following_mean_judgment",
          "value": 0.65,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "instruction_following",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench category aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_category_language_mean_judgment",
          "value": 0.5,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "language",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench category aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_overall_mean_judgment",
          "value": 0.6625,
          "normalized_0_100": 66.25,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench judgment aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_task_coding_completion_mean_judgment",
          "value": 1,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "coding",
          "task": "coding_completion",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench task aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_task_connections_mean_judgment",
          "value": 0,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "language",
          "task": "connections",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench task aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_task_lcb_generation_mean_judgment",
          "value": 0.666667,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "coding",
          "task": "LCB_generation",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench task aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_task_paraphrase_mean_judgment",
          "value": 0.3,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "instruction_following",
          "task": "paraphrase",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench task aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_task_story_generation_mean_judgment",
          "value": 1,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "instruction_following",
          "task": "story_generation",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench task aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_task_typos_mean_judgment",
          "value": 1,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "language",
          "task": "typos",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench task aggregate; not a RunLocalAI local execution measurement."
        }
      ]
    },
    {
      "model_key": "llama-3-3-70b-instruct",
      "display_name": "llama-3.3-70b-instruct",
      "provider": "meta",
      "aliases": [
        "llama-3.3-70b-instruct"
      ],
      "openness": null,
      "license": "Llama-3.3",
      "parameter_billions": null,
      "context_window": null,
      "modalities": [
        "text"
      ],
      "source_ids": [
        "lmarena"
      ],
      "composite": {
        "score_0_100": 66.25,
        "confidence": "low",
        "components": [
          {
            "source_id": "lmarena",
            "metric": "lmarena_text_overall_rating",
            "normalized_0_100": 66.25,
            "weight": 0.35
          }
        ],
        "derived_from": [
          "lmarena:lmarena_text_overall_rating"
        ],
        "limitations": [
          "This is an external model-intelligence prior, not a RunLocalAI local speed or fit measurement.",
          "Provider papers, human-preference ratings, and LiveBench judgments are not interchangeable with quantized local execution.",
          "Use this score only beside Will-It-Run fit, quantization, context, runtime, and measured tok/s evidence."
        ]
      },
      "scores": [
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_chinese_rating",
          "value": 1215.4541629565617,
          "normalized_0_100": 59.16,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 237,
          "lower": 1204.6567860609098,
          "upper": 1226.2515398522137,
          "category": "chinese",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_coding_rating",
          "value": 1268.9146400028408,
          "normalized_0_100": 65.52,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 227,
          "lower": 1262.445603804825,
          "upper": 1275.3836762008564,
          "category": "coding",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_overall_rating",
          "value": 1275.0550842314874,
          "normalized_0_100": 66.25,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 220,
          "lower": 1271.6206254632802,
          "upper": 1278.4895429996945,
          "category": "overall",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        }
      ]
    },
    {
      "model_key": "hunyuan-standard-2025-02-10",
      "display_name": "hunyuan-standard-2025-02-10",
      "provider": "tencent",
      "aliases": [
        "hunyuan-standard-2025-02-10"
      ],
      "openness": null,
      "license": "Proprietary",
      "parameter_billions": null,
      "context_window": null,
      "modalities": [
        "text"
      ],
      "source_ids": [
        "lmarena"
      ],
      "composite": {
        "score_0_100": 66.13,
        "confidence": "low",
        "components": [
          {
            "source_id": "lmarena",
            "metric": "lmarena_text_overall_rating",
            "normalized_0_100": 66.13,
            "weight": 0.35
          }
        ],
        "derived_from": [
          "lmarena:lmarena_text_overall_rating"
        ],
        "limitations": [
          "This is an external model-intelligence prior, not a RunLocalAI local speed or fit measurement.",
          "Provider papers, human-preference ratings, and LiveBench judgments are not interchangeable with quantized local execution.",
          "Use this score only beside Will-It-Run fit, quantization, context, runtime, and measured tok/s evidence."
        ]
      },
      "scores": [
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_chinese_rating",
          "value": 1318.938073614825,
          "normalized_0_100": 71.47,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 176,
          "lower": 1287.9314605546697,
          "upper": 1349.9446866749802,
          "category": "chinese",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_coding_rating",
          "value": 1270.1931666802873,
          "normalized_0_100": 65.67,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 224,
          "lower": 1246.5388798353279,
          "upper": 1293.8474535252467,
          "category": "coding",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_overall_rating",
          "value": 1274.0146166612342,
          "normalized_0_100": 66.13,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 221,
          "lower": 1264.3511077233493,
          "upper": 1283.678125599119,
          "category": "overall",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        }
      ]
    },
    {
      "model_key": "gair-openswe-72b",
      "display_name": "GAIR/OpenSWE-72B",
      "provider": "GAIR",
      "aliases": [
        "GAIR/OpenSWE-72B"
      ],
      "openness": "open",
      "license": "Unknown",
      "parameter_billions": 72.7,
      "context_window": null,
      "modalities": [
        "text"
      ],
      "source_ids": [
        "openevals"
      ],
      "composite": {
        "score_0_100": 66,
        "confidence": "low",
        "components": [
          {
            "source_id": "openevals",
            "metric": "openevals_aggregate",
            "normalized_0_100": 66,
            "weight": 0.45
          }
        ],
        "derived_from": [
          "openevals:openevals_aggregate"
        ],
        "limitations": [
          "This is an external model-intelligence prior, not a RunLocalAI local speed or fit measurement.",
          "Provider papers, human-preference ratings, and LiveBench judgments are not interchangeable with quantized local execution.",
          "Use this score only beside Will-It-Run fit, quantization, context, runtime, and measured tok/s evidence."
        ]
      },
      "scores": [
        {
          "source_id": "openevals",
          "metric": "openevals_aggregate",
          "value": 66,
          "normalized_0_100": 66,
          "scale": "0-100",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/OpenEvals/leaderboard-data",
          "source_dataset": "OpenEvals/leaderboard-data",
          "source_config": "default",
          "source_split": "train",
          "source_row_count": 105,
          "captured_at": "2026-05-28T11:57:28.766Z",
          "evidence_note": "External model quality prior from OpenEvals; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "openevals",
          "metric": "swe_verified",
          "value": 66,
          "normalized_0_100": 66,
          "scale": "0-100",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/OpenEvals/leaderboard-data",
          "source_dataset": "OpenEvals/leaderboard-data",
          "source_config": "default",
          "source_split": "train",
          "source_row_count": 105,
          "captured_at": "2026-05-28T11:57:28.766Z",
          "evidence_note": "External model quality prior from OpenEvals; not a RunLocalAI local execution measurement."
        }
      ]
    },
    {
      "model_key": "gemini-1-5-flash-8b-exp-0924",
      "display_name": "gemini-1.5-flash-8b-exp-0924",
      "provider": null,
      "aliases": [
        "gemini-1.5-flash-8b-exp-0924"
      ],
      "openness": null,
      "license": null,
      "parameter_billions": null,
      "context_window": null,
      "modalities": [
        "text"
      ],
      "source_ids": [
        "livebench"
      ],
      "composite": {
        "score_0_100": 65.88,
        "confidence": "low",
        "components": [
          {
            "source_id": "livebench",
            "metric": "livebench_overall_mean_judgment",
            "normalized_0_100": 65.88,
            "weight": 0.2
          }
        ],
        "derived_from": [
          "livebench:livebench_overall_mean_judgment"
        ],
        "limitations": [
          "This is an external model-intelligence prior, not a RunLocalAI local speed or fit measurement.",
          "Provider papers, human-preference ratings, and LiveBench judgments are not interchangeable with quantized local execution.",
          "Use this score only beside Will-It-Run fit, quantization, context, runtime, and measured tok/s evidence."
        ]
      },
      "scores": [
        {
          "source_id": "livebench",
          "metric": "livebench_category_coding_mean_judgment",
          "value": 1,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "coding",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench category aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_category_instruction_following_mean_judgment",
          "value": 0.4,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "instruction_following",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench category aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_category_language_mean_judgment",
          "value": 0.617647,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "language",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench category aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_overall_mean_judgment",
          "value": 0.658824,
          "normalized_0_100": 65.88,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench judgment aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_task_lcb_generation_mean_judgment",
          "value": 1,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "coding",
          "task": "LCB_generation",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench task aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_task_paraphrase_mean_judgment",
          "value": 0.4,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "instruction_following",
          "task": "paraphrase",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench task aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_task_plot_unscrambling_mean_judgment",
          "value": 0.235294,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "language",
          "task": "plot_unscrambling",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench task aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_task_typos_mean_judgment",
          "value": 1,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "language",
          "task": "typos",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench task aggregate; not a RunLocalAI local execution measurement."
        }
      ]
    },
    {
      "model_key": "deepseek-v2-5",
      "display_name": "deepseek-v2.5",
      "provider": "deepseek",
      "aliases": [
        "deepseek-v2.5"
      ],
      "openness": null,
      "license": "DeepSeek",
      "parameter_billions": null,
      "context_window": null,
      "modalities": [
        "text"
      ],
      "source_ids": [
        "lmarena"
      ],
      "composite": {
        "score_0_100": 65.8,
        "confidence": "low",
        "components": [
          {
            "source_id": "lmarena",
            "metric": "lmarena_text_overall_rating",
            "normalized_0_100": 65.8,
            "weight": 0.35
          }
        ],
        "derived_from": [
          "lmarena:lmarena_text_overall_rating"
        ],
        "limitations": [
          "This is an external model-intelligence prior, not a RunLocalAI local speed or fit measurement.",
          "Provider papers, human-preference ratings, and LiveBench judgments are not interchangeable with quantized local execution.",
          "Use this score only beside Will-It-Run fit, quantization, context, runtime, and measured tok/s evidence."
        ]
      },
      "scores": [
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_chinese_rating",
          "value": 1280.3503963744802,
          "normalized_0_100": 66.88,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 191,
          "lower": 1267.4626094086289,
          "upper": 1293.2381833403315,
          "category": "chinese",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_coding_rating",
          "value": 1301.8395393855626,
          "normalized_0_100": 69.44,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 202,
          "lower": 1292.555681632109,
          "upper": 1311.123397139016,
          "category": "coding",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_overall_rating",
          "value": 1271.291274897453,
          "normalized_0_100": 65.8,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 224,
          "lower": 1266.6264417793882,
          "upper": 1275.9561080155177,
          "category": "overall",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        }
      ]
    },
    {
      "model_key": "qwen-qwen3-4b-instruct-2507",
      "display_name": "Qwen/Qwen3-4B-Instruct-2507",
      "provider": "Qwen",
      "aliases": [
        "Qwen/Qwen3-4B-Instruct-2507"
      ],
      "openness": "open",
      "license": "Unknown",
      "parameter_billions": 4,
      "context_window": null,
      "modalities": [
        "text"
      ],
      "source_ids": [
        "openevals"
      ],
      "composite": {
        "score_0_100": 65.8,
        "confidence": "low",
        "components": [
          {
            "source_id": "openevals",
            "metric": "openevals_aggregate",
            "normalized_0_100": 65.8,
            "weight": 0.45
          }
        ],
        "derived_from": [
          "openevals:openevals_aggregate"
        ],
        "limitations": [
          "This is an external model-intelligence prior, not a RunLocalAI local speed or fit measurement.",
          "Provider papers, human-preference ratings, and LiveBench judgments are not interchangeable with quantized local execution.",
          "Use this score only beside Will-It-Run fit, quantization, context, runtime, and measured tok/s evidence."
        ]
      },
      "scores": [
        {
          "source_id": "openevals",
          "metric": "gpqa",
          "value": 62,
          "normalized_0_100": 62,
          "scale": "0-100",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/OpenEvals/leaderboard-data",
          "source_dataset": "OpenEvals/leaderboard-data",
          "source_config": "default",
          "source_split": "train",
          "source_row_count": 105,
          "captured_at": "2026-05-28T11:57:28.766Z",
          "evidence_note": "External model quality prior from OpenEvals; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "openevals",
          "metric": "mmlu_pro",
          "value": 69.6,
          "normalized_0_100": 69.6,
          "scale": "0-100",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/OpenEvals/leaderboard-data",
          "source_dataset": "OpenEvals/leaderboard-data",
          "source_config": "default",
          "source_split": "train",
          "source_row_count": 105,
          "captured_at": "2026-05-28T11:57:28.766Z",
          "evidence_note": "External model quality prior from OpenEvals; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "openevals",
          "metric": "openevals_aggregate",
          "value": 65.8,
          "normalized_0_100": 65.8,
          "scale": "0-100",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/OpenEvals/leaderboard-data",
          "source_dataset": "OpenEvals/leaderboard-data",
          "source_config": "default",
          "source_split": "train",
          "source_row_count": 105,
          "captured_at": "2026-05-28T11:57:28.766Z",
          "evidence_note": "External model quality prior from OpenEvals; not a RunLocalAI local execution measurement."
        }
      ]
    },
    {
      "model_key": "olmo-3-1-32b-think",
      "display_name": "olmo-3.1-32b-think",
      "provider": "allenai",
      "aliases": [
        "olmo-3.1-32b-think"
      ],
      "openness": null,
      "license": "Apache 2.0",
      "parameter_billions": null,
      "context_window": null,
      "modalities": [
        "text"
      ],
      "source_ids": [
        "lmarena"
      ],
      "composite": {
        "score_0_100": 65.67,
        "confidence": "low",
        "components": [
          {
            "source_id": "lmarena",
            "metric": "lmarena_text_overall_rating",
            "normalized_0_100": 65.67,
            "weight": 0.35
          }
        ],
        "derived_from": [
          "lmarena:lmarena_text_overall_rating"
        ],
        "limitations": [
          "This is an external model-intelligence prior, not a RunLocalAI local speed or fit measurement.",
          "Provider papers, human-preference ratings, and LiveBench judgments are not interchangeable with quantized local execution.",
          "Use this score only beside Will-It-Run fit, quantization, context, runtime, and measured tok/s evidence."
        ]
      },
      "scores": [
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_chinese_rating",
          "value": 1251.6759538578922,
          "normalized_0_100": 63.47,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 208,
          "lower": 1223.2139697105513,
          "upper": 1280.1379380052333,
          "category": "chinese",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_coding_rating",
          "value": 1288.4638417185133,
          "normalized_0_100": 67.85,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 211,
          "lower": 1273.1313798416732,
          "upper": 1303.796303595353,
          "category": "coding",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_overall_rating",
          "value": 1270.1930620813807,
          "normalized_0_100": 65.67,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 225,
          "lower": 1262.9889091683544,
          "upper": 1277.397214994407,
          "category": "overall",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        }
      ]
    },
    {
      "model_key": "qwen2-5-72b-instruct",
      "display_name": "qwen2.5-72b-instruct",
      "provider": "alibaba",
      "aliases": [
        "qwen2.5-72b-instruct"
      ],
      "openness": null,
      "license": "Qwen",
      "parameter_billions": null,
      "context_window": null,
      "modalities": [
        "text"
      ],
      "source_ids": [
        "lmarena"
      ],
      "composite": {
        "score_0_100": 65.54,
        "confidence": "low",
        "components": [
          {
            "source_id": "lmarena",
            "metric": "lmarena_text_overall_rating",
            "normalized_0_100": 65.54,
            "weight": 0.35
          }
        ],
        "derived_from": [
          "lmarena:lmarena_text_overall_rating"
        ],
        "limitations": [
          "This is an external model-intelligence prior, not a RunLocalAI local speed or fit measurement.",
          "Provider papers, human-preference ratings, and LiveBench judgments are not interchangeable with quantized local execution.",
          "Use this score only beside Will-It-Run fit, quantization, context, runtime, and measured tok/s evidence."
        ]
      },
      "scores": [
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_chinese_rating",
          "value": 1271.4534211352895,
          "normalized_0_100": 65.82,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 198,
          "lower": 1260.6259084532808,
          "upper": 1282.2809338172983,
          "category": "chinese",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_coding_rating",
          "value": 1292.9631733793744,
          "normalized_0_100": 68.38,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 206,
          "lower": 1285.3909471943343,
          "upper": 1300.535399564415,
          "category": "coding",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_overall_rating",
          "value": 1269.067032985252,
          "normalized_0_100": 65.54,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 226,
          "lower": 1265.0319014292695,
          "upper": 1273.1021645412345,
          "category": "overall",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        }
      ]
    },
    {
      "model_key": "gemini-2-0-flash-lite-001",
      "display_name": "gemini-2.0-flash-lite-001",
      "provider": null,
      "aliases": [
        "gemini-2.0-flash-lite-001"
      ],
      "openness": null,
      "license": null,
      "parameter_billions": null,
      "context_window": null,
      "modalities": [
        "text"
      ],
      "source_ids": [
        "livebench"
      ],
      "composite": {
        "score_0_100": 65.42,
        "confidence": "low",
        "components": [
          {
            "source_id": "livebench",
            "metric": "livebench_overall_mean_judgment",
            "normalized_0_100": 65.42,
            "weight": 0.2
          }
        ],
        "derived_from": [
          "livebench:livebench_overall_mean_judgment"
        ],
        "limitations": [
          "This is an external model-intelligence prior, not a RunLocalAI local speed or fit measurement.",
          "Provider papers, human-preference ratings, and LiveBench judgments are not interchangeable with quantized local execution.",
          "Use this score only beside Will-It-Run fit, quantization, context, runtime, and measured tok/s evidence."
        ]
      },
      "scores": [
        {
          "source_id": "livebench",
          "metric": "livebench_category_coding_mean_judgment",
          "value": 1,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "coding",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench category aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_category_instruction_following_mean_judgment",
          "value": 1,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "instruction_following",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench category aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_category_language_mean_judgment",
          "value": 0.515936,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "language",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench category aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_overall_mean_judgment",
          "value": 0.65424,
          "normalized_0_100": 65.42,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench judgment aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_task_lcb_generation_mean_judgment",
          "value": 1,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "coding",
          "task": "LCB_generation",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench task aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_task_paraphrase_mean_judgment",
          "value": 1,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "instruction_following",
          "task": "paraphrase",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench task aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_task_plot_unscrambling_mean_judgment",
          "value": 0.28984,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "language",
          "task": "plot_unscrambling",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench task aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_task_typos_mean_judgment",
          "value": 0.666667,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "language",
          "task": "typos",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench task aggregate; not a RunLocalAI local execution measurement."
        }
      ]
    },
    {
      "model_key": "o1-preview-2024-09-12",
      "display_name": "o1-preview-2024-09-12",
      "provider": null,
      "aliases": [
        "o1-preview-2024-09-12"
      ],
      "openness": null,
      "license": null,
      "parameter_billions": null,
      "context_window": null,
      "modalities": [
        "text"
      ],
      "source_ids": [
        "livebench"
      ],
      "composite": {
        "score_0_100": 65.36,
        "confidence": "low",
        "components": [
          {
            "source_id": "livebench",
            "metric": "livebench_overall_mean_judgment",
            "normalized_0_100": 65.36,
            "weight": 0.2
          }
        ],
        "derived_from": [
          "livebench:livebench_overall_mean_judgment"
        ],
        "limitations": [
          "This is an external model-intelligence prior, not a RunLocalAI local speed or fit measurement.",
          "Provider papers, human-preference ratings, and LiveBench judgments are not interchangeable with quantized local execution.",
          "Use this score only beside Will-It-Run fit, quantization, context, runtime, and measured tok/s evidence."
        ]
      },
      "scores": [
        {
          "source_id": "livebench",
          "metric": "livebench_category_coding_mean_judgment",
          "value": 0,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "coding",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench category aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_category_instruction_following_mean_judgment",
          "value": 0.666667,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "instruction_following",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench category aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_category_language_mean_judgment",
          "value": 0.862745,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "language",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench category aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_overall_mean_judgment",
          "value": 0.653595,
          "normalized_0_100": 65.36,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench judgment aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_task_coding_completion_mean_judgment",
          "value": 0,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "coding",
          "task": "coding_completion",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench task aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_task_connections_mean_judgment",
          "value": 1,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "language",
          "task": "connections",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench task aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_task_paraphrase_mean_judgment",
          "value": 0.333333,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "instruction_following",
          "task": "paraphrase",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench task aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_task_plot_unscrambling_mean_judgment",
          "value": 0.588235,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "language",
          "task": "plot_unscrambling",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench task aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_task_story_generation_mean_judgment",
          "value": 1,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "instruction_following",
          "task": "story_generation",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench task aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_task_typos_mean_judgment",
          "value": 1,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "language",
          "task": "typos",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench task aggregate; not a RunLocalAI local execution measurement."
        }
      ]
    },
    {
      "model_key": "athene-70b-0725",
      "display_name": "athene-70b-0725",
      "provider": null,
      "aliases": [
        "athene-70b-0725"
      ],
      "openness": null,
      "license": "CC-BY-NC-4.0",
      "parameter_billions": null,
      "context_window": null,
      "modalities": [
        "text"
      ],
      "source_ids": [
        "lmarena"
      ],
      "composite": {
        "score_0_100": 65.08,
        "confidence": "low",
        "components": [
          {
            "source_id": "lmarena",
            "metric": "lmarena_text_overall_rating",
            "normalized_0_100": 65.08,
            "weight": 0.35
          }
        ],
        "derived_from": [
          "lmarena:lmarena_text_overall_rating"
        ],
        "limitations": [
          "This is an external model-intelligence prior, not a RunLocalAI local speed or fit measurement.",
          "Provider papers, human-preference ratings, and LiveBench judgments are not interchangeable with quantized local execution.",
          "Use this score only beside Will-It-Run fit, quantization, context, runtime, and measured tok/s evidence."
        ]
      },
      "scores": [
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_chinese_rating",
          "value": 1236.5862929584796,
          "normalized_0_100": 61.68,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 222,
          "lower": 1222.0687251144486,
          "upper": 1251.1038608025106,
          "category": "chinese",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_coding_rating",
          "value": 1267.4762831679482,
          "normalized_0_100": 65.35,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 229,
          "lower": 1256.6913316307923,
          "upper": 1278.261234705104,
          "category": "coding",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_overall_rating",
          "value": 1265.1855726346648,
          "normalized_0_100": 65.08,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 229,
          "lower": 1259.5513620102238,
          "upper": 1270.8197832591059,
          "category": "overall",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        }
      ]
    },
    {
      "model_key": "hunyuan-large-vision",
      "display_name": "hunyuan-large-vision",
      "provider": "tencent",
      "aliases": [
        "hunyuan-large-vision"
      ],
      "openness": null,
      "license": "Proprietary",
      "parameter_billions": null,
      "context_window": null,
      "modalities": [
        "text"
      ],
      "source_ids": [
        "lmarena"
      ],
      "composite": {
        "score_0_100": 64.86,
        "confidence": "low",
        "components": [
          {
            "source_id": "lmarena",
            "metric": "lmarena_text_overall_rating",
            "normalized_0_100": 64.86,
            "weight": 0.35
          }
        ],
        "derived_from": [
          "lmarena:lmarena_text_overall_rating"
        ],
        "limitations": [
          "This is an external model-intelligence prior, not a RunLocalAI local speed or fit measurement.",
          "Provider papers, human-preference ratings, and LiveBench judgments are not interchangeable with quantized local execution.",
          "Use this score only beside Will-It-Run fit, quantization, context, runtime, and measured tok/s evidence."
        ]
      },
      "scores": [
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_chinese_rating",
          "value": 1274.1572620723414,
          "normalized_0_100": 66.14,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 194,
          "lower": 1236.0168697867266,
          "upper": 1312.2976543579564,
          "category": "chinese",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_coding_rating",
          "value": 1306.4061666883067,
          "normalized_0_100": 69.98,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 199,
          "lower": 1287.3882502023382,
          "upper": 1325.4240831742752,
          "category": "coding",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_overall_rating",
          "value": 1263.381218463869,
          "normalized_0_100": 64.86,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 231,
          "lower": 1254.1769236414136,
          "upper": 1272.5855132863242,
          "category": "overall",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        }
      ]
    },
    {
      "model_key": "llama-3-1-70b-instruct",
      "display_name": "llama-3.1-70b-instruct",
      "provider": "meta",
      "aliases": [
        "llama-3.1-70b-instruct"
      ],
      "openness": null,
      "license": "Llama 3.1 Community",
      "parameter_billions": null,
      "context_window": null,
      "modalities": [
        "text"
      ],
      "source_ids": [
        "lmarena"
      ],
      "composite": {
        "score_0_100": 64.57,
        "confidence": "low",
        "components": [
          {
            "source_id": "lmarena",
            "metric": "lmarena_text_overall_rating",
            "normalized_0_100": 64.57,
            "weight": 0.35
          }
        ],
        "derived_from": [
          "lmarena:lmarena_text_overall_rating"
        ],
        "limitations": [
          "This is an external model-intelligence prior, not a RunLocalAI local speed or fit measurement.",
          "Provider papers, human-preference ratings, and LiveBench judgments are not interchangeable with quantized local execution.",
          "Use this score only beside Will-It-Run fit, quantization, context, runtime, and measured tok/s evidence."
        ]
      },
      "scores": [
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_chinese_rating",
          "value": 1214.2961099491545,
          "normalized_0_100": 59.03,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 238,
          "lower": 1204.6377059097567,
          "upper": 1223.9545139885522,
          "category": "chinese",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_coding_rating",
          "value": 1260.195370886955,
          "normalized_0_100": 64.48,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 233,
          "lower": 1253.1245275569922,
          "upper": 1267.266214216918,
          "category": "coding",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_overall_rating",
          "value": 1260.956515692058,
          "normalized_0_100": 64.57,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 234,
          "lower": 1257.267761294451,
          "upper": 1264.6452700896648,
          "category": "overall",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        }
      ]
    },
    {
      "model_key": "o1-2024-12-17-high",
      "display_name": "o1-2024-12-17-high",
      "provider": null,
      "aliases": [
        "o1-2024-12-17-high"
      ],
      "openness": null,
      "license": null,
      "parameter_billions": null,
      "context_window": null,
      "modalities": [
        "text"
      ],
      "source_ids": [
        "livebench"
      ],
      "composite": {
        "score_0_100": 64.49,
        "confidence": "low",
        "components": [
          {
            "source_id": "livebench",
            "metric": "livebench_overall_mean_judgment",
            "normalized_0_100": 64.49,
            "weight": 0.2
          }
        ],
        "derived_from": [
          "livebench:livebench_overall_mean_judgment"
        ],
        "limitations": [
          "This is an external model-intelligence prior, not a RunLocalAI local speed or fit measurement.",
          "Provider papers, human-preference ratings, and LiveBench judgments are not interchangeable with quantized local execution.",
          "Use this score only beside Will-It-Run fit, quantization, context, runtime, and measured tok/s evidence."
        ]
      },
      "scores": [
        {
          "source_id": "livebench",
          "metric": "livebench_category_coding_mean_judgment",
          "value": 1,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "coding",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench category aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_category_instruction_following_mean_judgment",
          "value": 0.333333,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "instruction_following",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench category aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_category_language_mean_judgment",
          "value": 0.636127,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "language",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench category aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_overall_mean_judgment",
          "value": 0.644853,
          "normalized_0_100": 64.49,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench judgment aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_task_coding_completion_mean_judgment",
          "value": 1,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "coding",
          "task": "coding_completion",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench task aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_task_paraphrase_mean_judgment",
          "value": 0.333333,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "instruction_following",
          "task": "paraphrase",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench task aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_task_plot_unscrambling_mean_judgment",
          "value": 0.393545,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "language",
          "task": "plot_unscrambling",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench task aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_task_typos_mean_judgment",
          "value": 1,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "language",
          "task": "typos",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench task aggregate; not a RunLocalAI local execution measurement."
        }
      ]
    },
    {
      "model_key": "deepseek-ai-deepseek-v3-2",
      "display_name": "deepseek-ai/DeepSeek-V3.2",
      "provider": "deepseek-ai",
      "aliases": [
        "deepseek-ai/DeepSeek-V3.2"
      ],
      "openness": "open",
      "license": "Unknown",
      "parameter_billions": 685.4,
      "context_window": null,
      "modalities": [
        "text"
      ],
      "source_ids": [
        "openevals"
      ],
      "composite": {
        "score_0_100": 64.28,
        "confidence": "low",
        "components": [
          {
            "source_id": "openevals",
            "metric": "openevals_aggregate",
            "normalized_0_100": 64.28,
            "weight": 0.45
          }
        ],
        "derived_from": [
          "openevals:openevals_aggregate"
        ],
        "limitations": [
          "This is an external model-intelligence prior, not a RunLocalAI local speed or fit measurement.",
          "Provider papers, human-preference ratings, and LiveBench judgments are not interchangeable with quantized local execution.",
          "Use this score only beside Will-It-Run fit, quantization, context, runtime, and measured tok/s evidence."
        ]
      },
      "scores": [
        {
          "source_id": "openevals",
          "metric": "aime2026",
          "value": 94.17,
          "normalized_0_100": 94.17,
          "scale": "0-100",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/OpenEvals/leaderboard-data",
          "source_dataset": "OpenEvals/leaderboard-data",
          "source_config": "default",
          "source_split": "train",
          "source_row_count": 105,
          "captured_at": "2026-05-28T11:57:28.766Z",
          "evidence_note": "External model quality prior from OpenEvals; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "openevals",
          "metric": "evasion_bench",
          "value": 66.88,
          "normalized_0_100": 66.88,
          "scale": "0-100",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/OpenEvals/leaderboard-data",
          "source_dataset": "OpenEvals/leaderboard-data",
          "source_config": "default",
          "source_split": "train",
          "source_row_count": 105,
          "captured_at": "2026-05-28T11:57:28.766Z",
          "evidence_note": "External model quality prior from OpenEvals; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "openevals",
          "metric": "gpqa",
          "value": 82.4,
          "normalized_0_100": 82.4,
          "scale": "0-100",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/OpenEvals/leaderboard-data",
          "source_dataset": "OpenEvals/leaderboard-data",
          "source_config": "default",
          "source_split": "train",
          "source_row_count": 105,
          "captured_at": "2026-05-28T11:57:28.766Z",
          "evidence_note": "External model quality prior from OpenEvals; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "openevals",
          "metric": "hle",
          "value": 40.8,
          "normalized_0_100": 40.8,
          "scale": "0-100",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/OpenEvals/leaderboard-data",
          "source_dataset": "OpenEvals/leaderboard-data",
          "source_config": "default",
          "source_split": "train",
          "source_row_count": 105,
          "captured_at": "2026-05-28T11:57:28.766Z",
          "evidence_note": "External model quality prior from OpenEvals; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "openevals",
          "metric": "hmmt2026",
          "value": 84.09,
          "normalized_0_100": 84.09,
          "scale": "0-100",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/OpenEvals/leaderboard-data",
          "source_dataset": "OpenEvals/leaderboard-data",
          "source_config": "default",
          "source_split": "train",
          "source_row_count": 105,
          "captured_at": "2026-05-28T11:57:28.766Z",
          "evidence_note": "External model quality prior from OpenEvals; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "openevals",
          "metric": "mmlu_pro",
          "value": 85,
          "normalized_0_100": 85,
          "scale": "0-100",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/OpenEvals/leaderboard-data",
          "source_dataset": "OpenEvals/leaderboard-data",
          "source_config": "default",
          "source_split": "train",
          "source_row_count": 105,
          "captured_at": "2026-05-28T11:57:28.766Z",
          "evidence_note": "External model quality prior from OpenEvals; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "openevals",
          "metric": "openevals_aggregate",
          "value": 64.28,
          "normalized_0_100": 64.28,
          "scale": "0-100",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/OpenEvals/leaderboard-data",
          "source_dataset": "OpenEvals/leaderboard-data",
          "source_config": "default",
          "source_split": "train",
          "source_row_count": 105,
          "captured_at": "2026-05-28T11:57:28.766Z",
          "evidence_note": "External model quality prior from OpenEvals; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "openevals",
          "metric": "swe_pro",
          "value": 15.56,
          "normalized_0_100": 15.56,
          "scale": "0-100",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/OpenEvals/leaderboard-data",
          "source_dataset": "OpenEvals/leaderboard-data",
          "source_config": "default",
          "source_split": "train",
          "source_row_count": 105,
          "captured_at": "2026-05-28T11:57:28.766Z",
          "evidence_note": "External model quality prior from OpenEvals; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "openevals",
          "metric": "swe_verified",
          "value": 70,
          "normalized_0_100": 70,
          "scale": "0-100",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/OpenEvals/leaderboard-data",
          "source_dataset": "OpenEvals/leaderboard-data",
          "source_config": "default",
          "source_split": "train",
          "source_row_count": 105,
          "captured_at": "2026-05-28T11:57:28.766Z",
          "evidence_note": "External model quality prior from OpenEvals; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "openevals",
          "metric": "terminal_bench",
          "value": 39.6,
          "normalized_0_100": 39.6,
          "scale": "0-100",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/OpenEvals/leaderboard-data",
          "source_dataset": "OpenEvals/leaderboard-data",
          "source_config": "default",
          "source_split": "train",
          "source_row_count": 105,
          "captured_at": "2026-05-28T11:57:28.766Z",
          "evidence_note": "External model quality prior from OpenEvals; not a RunLocalAI local execution measurement."
        }
      ]
    },
    {
      "model_key": "zai-org-glm-4-7",
      "display_name": "zai-org/GLM-4.7",
      "provider": "zai-org",
      "aliases": [
        "zai-org/GLM-4.7"
      ],
      "openness": "open",
      "license": "Unknown",
      "parameter_billions": 358.3,
      "context_window": null,
      "modalities": [
        "text"
      ],
      "source_ids": [
        "openevals"
      ],
      "composite": {
        "score_0_100": 64.15,
        "confidence": "low",
        "components": [
          {
            "source_id": "openevals",
            "metric": "openevals_aggregate",
            "normalized_0_100": 64.15,
            "weight": 0.45
          }
        ],
        "derived_from": [
          "openevals:openevals_aggregate"
        ],
        "limitations": [
          "This is an external model-intelligence prior, not a RunLocalAI local speed or fit measurement.",
          "Provider papers, human-preference ratings, and LiveBench judgments are not interchangeable with quantized local execution.",
          "Use this score only beside Will-It-Run fit, quantization, context, runtime, and measured tok/s evidence."
        ]
      },
      "scores": [
        {
          "source_id": "openevals",
          "metric": "evasion_bench",
          "value": 82.91,
          "normalized_0_100": 82.91,
          "scale": "0-100",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/OpenEvals/leaderboard-data",
          "source_dataset": "OpenEvals/leaderboard-data",
          "source_config": "default",
          "source_split": "train",
          "source_row_count": 105,
          "captured_at": "2026-05-28T11:57:28.766Z",
          "evidence_note": "External model quality prior from OpenEvals; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "openevals",
          "metric": "gpqa",
          "value": 85.7,
          "normalized_0_100": 85.7,
          "scale": "0-100",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/OpenEvals/leaderboard-data",
          "source_dataset": "OpenEvals/leaderboard-data",
          "source_config": "default",
          "source_split": "train",
          "source_row_count": 105,
          "captured_at": "2026-05-28T11:57:28.766Z",
          "evidence_note": "External model quality prior from OpenEvals; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "openevals",
          "metric": "hle",
          "value": 24.8,
          "normalized_0_100": 24.8,
          "scale": "0-100",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/OpenEvals/leaderboard-data",
          "source_dataset": "OpenEvals/leaderboard-data",
          "source_config": "default",
          "source_split": "train",
          "source_row_count": 105,
          "captured_at": "2026-05-28T11:57:28.766Z",
          "evidence_note": "External model quality prior from OpenEvals; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "openevals",
          "metric": "mmlu_pro",
          "value": 84.3,
          "normalized_0_100": 84.3,
          "scale": "0-100",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/OpenEvals/leaderboard-data",
          "source_dataset": "OpenEvals/leaderboard-data",
          "source_config": "default",
          "source_split": "train",
          "source_row_count": 105,
          "captured_at": "2026-05-28T11:57:28.766Z",
          "evidence_note": "External model quality prior from OpenEvals; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "openevals",
          "metric": "openevals_aggregate",
          "value": 64.15,
          "normalized_0_100": 64.15,
          "scale": "0-100",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/OpenEvals/leaderboard-data",
          "source_dataset": "OpenEvals/leaderboard-data",
          "source_config": "default",
          "source_split": "train",
          "source_row_count": 105,
          "captured_at": "2026-05-28T11:57:28.766Z",
          "evidence_note": "External model quality prior from OpenEvals; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "openevals",
          "metric": "swe_verified",
          "value": 73.8,
          "normalized_0_100": 73.8,
          "scale": "0-100",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/OpenEvals/leaderboard-data",
          "source_dataset": "OpenEvals/leaderboard-data",
          "source_config": "default",
          "source_split": "train",
          "source_row_count": 105,
          "captured_at": "2026-05-28T11:57:28.766Z",
          "evidence_note": "External model quality prior from OpenEvals; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "openevals",
          "metric": "terminal_bench",
          "value": 33.4,
          "normalized_0_100": 33.4,
          "scale": "0-100",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/OpenEvals/leaderboard-data",
          "source_dataset": "OpenEvals/leaderboard-data",
          "source_config": "default",
          "source_split": "train",
          "source_row_count": 105,
          "captured_at": "2026-05-28T11:57:28.766Z",
          "evidence_note": "External model quality prior from OpenEvals; not a RunLocalAI local execution measurement."
        }
      ]
    },
    {
      "model_key": "qwen-qwen3-5-122b-a10b",
      "display_name": "Qwen/Qwen3.5-122B-A10B",
      "provider": "Qwen",
      "aliases": [
        "Qwen/Qwen3.5-122B-A10B"
      ],
      "openness": "open",
      "license": "Unknown",
      "parameter_billions": 125.1,
      "context_window": null,
      "modalities": [
        "text"
      ],
      "source_ids": [
        "openevals"
      ],
      "composite": {
        "score_0_100": 64,
        "confidence": "low",
        "components": [
          {
            "source_id": "openevals",
            "metric": "openevals_aggregate",
            "normalized_0_100": 64,
            "weight": 0.45
          }
        ],
        "derived_from": [
          "openevals:openevals_aggregate"
        ],
        "limitations": [
          "This is an external model-intelligence prior, not a RunLocalAI local speed or fit measurement.",
          "Provider papers, human-preference ratings, and LiveBench judgments are not interchangeable with quantized local execution.",
          "Use this score only beside Will-It-Run fit, quantization, context, runtime, and measured tok/s evidence."
        ]
      },
      "scores": [
        {
          "source_id": "openevals",
          "metric": "gpqa",
          "value": 86.6,
          "normalized_0_100": 86.6,
          "scale": "0-100",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/OpenEvals/leaderboard-data",
          "source_dataset": "OpenEvals/leaderboard-data",
          "source_config": "default",
          "source_split": "train",
          "source_row_count": 105,
          "captured_at": "2026-05-28T11:57:28.766Z",
          "evidence_note": "External model quality prior from OpenEvals; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "openevals",
          "metric": "hle",
          "value": 25.3,
          "normalized_0_100": 25.3,
          "scale": "0-100",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/OpenEvals/leaderboard-data",
          "source_dataset": "OpenEvals/leaderboard-data",
          "source_config": "default",
          "source_split": "train",
          "source_row_count": 105,
          "captured_at": "2026-05-28T11:57:28.766Z",
          "evidence_note": "External model quality prior from OpenEvals; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "openevals",
          "metric": "mmlu_pro",
          "value": 86.7,
          "normalized_0_100": 86.7,
          "scale": "0-100",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/OpenEvals/leaderboard-data",
          "source_dataset": "OpenEvals/leaderboard-data",
          "source_config": "default",
          "source_split": "train",
          "source_row_count": 105,
          "captured_at": "2026-05-28T11:57:28.766Z",
          "evidence_note": "External model quality prior from OpenEvals; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "openevals",
          "metric": "openevals_aggregate",
          "value": 64,
          "normalized_0_100": 64,
          "scale": "0-100",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/OpenEvals/leaderboard-data",
          "source_dataset": "OpenEvals/leaderboard-data",
          "source_config": "default",
          "source_split": "train",
          "source_row_count": 105,
          "captured_at": "2026-05-28T11:57:28.766Z",
          "evidence_note": "External model quality prior from OpenEvals; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "openevals",
          "metric": "swe_verified",
          "value": 72,
          "normalized_0_100": 72,
          "scale": "0-100",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/OpenEvals/leaderboard-data",
          "source_dataset": "OpenEvals/leaderboard-data",
          "source_config": "default",
          "source_split": "train",
          "source_row_count": 105,
          "captured_at": "2026-05-28T11:57:28.766Z",
          "evidence_note": "External model quality prior from OpenEvals; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "openevals",
          "metric": "terminal_bench",
          "value": 49.4,
          "normalized_0_100": 49.4,
          "scale": "0-100",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/OpenEvals/leaderboard-data",
          "source_dataset": "OpenEvals/leaderboard-data",
          "source_config": "default",
          "source_split": "train",
          "source_row_count": 105,
          "captured_at": "2026-05-28T11:57:28.766Z",
          "evidence_note": "External model quality prior from OpenEvals; not a RunLocalAI local execution measurement."
        }
      ]
    },
    {
      "model_key": "llama-3-1-tulu-3-70b",
      "display_name": "llama-3.1-tulu-3-70b",
      "provider": "allenai",
      "aliases": [
        "llama-3.1-tulu-3-70b"
      ],
      "openness": null,
      "license": "Llama 3.1",
      "parameter_billions": null,
      "context_window": null,
      "modalities": [
        "text"
      ],
      "source_ids": [
        "lmarena"
      ],
      "composite": {
        "score_0_100": 63.96,
        "confidence": "low",
        "components": [
          {
            "source_id": "lmarena",
            "metric": "lmarena_text_overall_rating",
            "normalized_0_100": 63.96,
            "weight": 0.35
          }
        ],
        "derived_from": [
          "lmarena:lmarena_text_overall_rating"
        ],
        "limitations": [
          "This is an external model-intelligence prior, not a RunLocalAI local speed or fit measurement.",
          "Provider papers, human-preference ratings, and LiveBench judgments are not interchangeable with quantized local execution.",
          "Use this score only beside Will-It-Run fit, quantization, context, runtime, and measured tok/s evidence."
        ]
      },
      "scores": [
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_chinese_rating",
          "value": 1248.3233543773272,
          "normalized_0_100": 63.07,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 209,
          "lower": 1212.5086694172012,
          "upper": 1284.138039337453,
          "category": "chinese",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_coding_rating",
          "value": 1235.9223832050368,
          "normalized_0_100": 61.6,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 242,
          "lower": 1212.1199296653635,
          "upper": 1259.72483674471,
          "category": "coding",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_overall_rating",
          "value": 1255.7803466814785,
          "normalized_0_100": 63.96,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 236,
          "lower": 1245.3082173032508,
          "upper": 1266.2524760597064,
          "category": "overall",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        }
      ]
    },
    {
      "model_key": "magistral-medium-2506",
      "display_name": "magistral-medium-2506",
      "provider": "mistral",
      "aliases": [
        "magistral-medium-2506"
      ],
      "openness": null,
      "license": "Proprietary",
      "parameter_billions": null,
      "context_window": null,
      "modalities": [
        "text"
      ],
      "source_ids": [
        "lmarena"
      ],
      "composite": {
        "score_0_100": 63.79,
        "confidence": "low",
        "components": [
          {
            "source_id": "lmarena",
            "metric": "lmarena_text_overall_rating",
            "normalized_0_100": 63.79,
            "weight": 0.35
          }
        ],
        "derived_from": [
          "lmarena:lmarena_text_overall_rating"
        ],
        "limitations": [
          "This is an external model-intelligence prior, not a RunLocalAI local speed or fit measurement.",
          "Provider papers, human-preference ratings, and LiveBench judgments are not interchangeable with quantized local execution.",
          "Use this score only beside Will-It-Run fit, quantization, context, runtime, and measured tok/s evidence."
        ]
      },
      "scores": [
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_chinese_rating",
          "value": 1224.4076842539212,
          "normalized_0_100": 60.23,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 230,
          "lower": 1196.94184532805,
          "upper": 1251.8735231797918,
          "category": "chinese",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_coding_rating",
          "value": 1319.4869542745278,
          "normalized_0_100": 71.53,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 187,
          "lower": 1307.0708575025656,
          "upper": 1331.9030510464897,
          "category": "coding",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_overall_rating",
          "value": 1254.3741760222415,
          "normalized_0_100": 63.79,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 238,
          "lower": 1247.952934853342,
          "upper": 1260.7954171911408,
          "category": "overall",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        }
      ]
    },
    {
      "model_key": "reka-core-20240904",
      "display_name": "reka-core-20240904",
      "provider": null,
      "aliases": [
        "reka-core-20240904"
      ],
      "openness": null,
      "license": "Proprietary",
      "parameter_billions": null,
      "context_window": null,
      "modalities": [
        "text"
      ],
      "source_ids": [
        "lmarena"
      ],
      "composite": {
        "score_0_100": 63.08,
        "confidence": "low",
        "components": [
          {
            "source_id": "lmarena",
            "metric": "lmarena_text_overall_rating",
            "normalized_0_100": 63.08,
            "weight": 0.35
          }
        ],
        "derived_from": [
          "lmarena:lmarena_text_overall_rating"
        ],
        "limitations": [
          "This is an external model-intelligence prior, not a RunLocalAI local speed or fit measurement.",
          "Provider papers, human-preference ratings, and LiveBench judgments are not interchangeable with quantized local execution.",
          "Use this score only beside Will-It-Run fit, quantization, context, runtime, and measured tok/s evidence."
        ]
      },
      "scores": [
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_chinese_rating",
          "value": 1240.1030179901536,
          "normalized_0_100": 62.09,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 218,
          "lower": 1217.1293519735939,
          "upper": 1263.0766840067135,
          "category": "chinese",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_coding_rating",
          "value": 1230.5452120075824,
          "normalized_0_100": 60.96,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 246,
          "lower": 1215.1323874334294,
          "upper": 1245.9580365817355,
          "category": "coding",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_overall_rating",
          "value": 1248.4253064699801,
          "normalized_0_100": 63.08,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 239,
          "lower": 1241.3192210056227,
          "upper": 1255.5313919343375,
          "category": "overall",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        }
      ]
    },
    {
      "model_key": "nvidia-nvidia-nemotron-3-super-120b-a12b-bf16",
      "display_name": "nvidia/NVIDIA-Nemotron-3-Super-120B-A12B-BF16",
      "provider": "nvidia",
      "aliases": [
        "nvidia/NVIDIA-Nemotron-3-Super-120B-A12B-BF16"
      ],
      "openness": "open",
      "license": "Unknown",
      "parameter_billions": 123.6,
      "context_window": null,
      "modalities": [
        "text"
      ],
      "source_ids": [
        "openevals"
      ],
      "composite": {
        "score_0_100": 62.97,
        "confidence": "low",
        "components": [
          {
            "source_id": "openevals",
            "metric": "openevals_aggregate",
            "normalized_0_100": 62.97,
            "weight": 0.45
          }
        ],
        "derived_from": [
          "openevals:openevals_aggregate"
        ],
        "limitations": [
          "This is an external model-intelligence prior, not a RunLocalAI local speed or fit measurement.",
          "Provider papers, human-preference ratings, and LiveBench judgments are not interchangeable with quantized local execution.",
          "Use this score only beside Will-It-Run fit, quantization, context, runtime, and measured tok/s evidence."
        ]
      },
      "scores": [
        {
          "source_id": "openevals",
          "metric": "aime2026",
          "value": 90,
          "normalized_0_100": 90,
          "scale": "0-100",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/OpenEvals/leaderboard-data",
          "source_dataset": "OpenEvals/leaderboard-data",
          "source_config": "default",
          "source_split": "train",
          "source_row_count": 105,
          "captured_at": "2026-05-28T11:57:28.766Z",
          "evidence_note": "External model quality prior from OpenEvals; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "openevals",
          "metric": "gpqa",
          "value": 79.23,
          "normalized_0_100": 79.23,
          "scale": "0-100",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/OpenEvals/leaderboard-data",
          "source_dataset": "OpenEvals/leaderboard-data",
          "source_config": "default",
          "source_split": "train",
          "source_row_count": 105,
          "captured_at": "2026-05-28T11:57:28.766Z",
          "evidence_note": "External model quality prior from OpenEvals; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "openevals",
          "metric": "hle",
          "value": 18.26,
          "normalized_0_100": 18.26,
          "scale": "0-100",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/OpenEvals/leaderboard-data",
          "source_dataset": "OpenEvals/leaderboard-data",
          "source_config": "default",
          "source_split": "train",
          "source_row_count": 105,
          "captured_at": "2026-05-28T11:57:28.766Z",
          "evidence_note": "External model quality prior from OpenEvals; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "openevals",
          "metric": "hmmt2026",
          "value": 84.85,
          "normalized_0_100": 84.85,
          "scale": "0-100",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/OpenEvals/leaderboard-data",
          "source_dataset": "OpenEvals/leaderboard-data",
          "source_config": "default",
          "source_split": "train",
          "source_row_count": 105,
          "captured_at": "2026-05-28T11:57:28.766Z",
          "evidence_note": "External model quality prior from OpenEvals; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "openevals",
          "metric": "mmlu_pro",
          "value": 83.73,
          "normalized_0_100": 83.73,
          "scale": "0-100",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/OpenEvals/leaderboard-data",
          "source_dataset": "OpenEvals/leaderboard-data",
          "source_config": "default",
          "source_split": "train",
          "source_row_count": 105,
          "captured_at": "2026-05-28T11:57:28.766Z",
          "evidence_note": "External model quality prior from OpenEvals; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "openevals",
          "metric": "openevals_aggregate",
          "value": 62.97,
          "normalized_0_100": 62.97,
          "scale": "0-100",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/OpenEvals/leaderboard-data",
          "source_dataset": "OpenEvals/leaderboard-data",
          "source_config": "default",
          "source_split": "train",
          "source_row_count": 105,
          "captured_at": "2026-05-28T11:57:28.766Z",
          "evidence_note": "External model quality prior from OpenEvals; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "openevals",
          "metric": "swe_verified",
          "value": 53.73,
          "normalized_0_100": 53.73,
          "scale": "0-100",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/OpenEvals/leaderboard-data",
          "source_dataset": "OpenEvals/leaderboard-data",
          "source_config": "default",
          "source_split": "train",
          "source_row_count": 105,
          "captured_at": "2026-05-28T11:57:28.766Z",
          "evidence_note": "External model quality prior from OpenEvals; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "openevals",
          "metric": "terminal_bench",
          "value": 31,
          "normalized_0_100": 31,
          "scale": "0-100",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/OpenEvals/leaderboard-data",
          "source_dataset": "OpenEvals/leaderboard-data",
          "source_config": "default",
          "source_split": "train",
          "source_row_count": 105,
          "captured_at": "2026-05-28T11:57:28.766Z",
          "evidence_note": "External model quality prior from OpenEvals; not a RunLocalAI local execution measurement."
        }
      ]
    },
    {
      "model_key": "gair-openswe-32b",
      "display_name": "GAIR/OpenSWE-32B",
      "provider": "GAIR",
      "aliases": [
        "GAIR/OpenSWE-32B"
      ],
      "openness": "open",
      "license": "Unknown",
      "parameter_billions": 32.8,
      "context_window": null,
      "modalities": [
        "text"
      ],
      "source_ids": [
        "openevals"
      ],
      "composite": {
        "score_0_100": 62.4,
        "confidence": "low",
        "components": [
          {
            "source_id": "openevals",
            "metric": "openevals_aggregate",
            "normalized_0_100": 62.4,
            "weight": 0.45
          }
        ],
        "derived_from": [
          "openevals:openevals_aggregate"
        ],
        "limitations": [
          "This is an external model-intelligence prior, not a RunLocalAI local speed or fit measurement.",
          "Provider papers, human-preference ratings, and LiveBench judgments are not interchangeable with quantized local execution.",
          "Use this score only beside Will-It-Run fit, quantization, context, runtime, and measured tok/s evidence."
        ]
      },
      "scores": [
        {
          "source_id": "openevals",
          "metric": "openevals_aggregate",
          "value": 62.4,
          "normalized_0_100": 62.4,
          "scale": "0-100",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/OpenEvals/leaderboard-data",
          "source_dataset": "OpenEvals/leaderboard-data",
          "source_config": "default",
          "source_split": "train",
          "source_row_count": 105,
          "captured_at": "2026-05-28T11:57:28.766Z",
          "evidence_note": "External model quality prior from OpenEvals; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "openevals",
          "metric": "swe_verified",
          "value": 62.4,
          "normalized_0_100": 62.4,
          "scale": "0-100",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/OpenEvals/leaderboard-data",
          "source_dataset": "OpenEvals/leaderboard-data",
          "source_config": "default",
          "source_split": "train",
          "source_row_count": 105,
          "captured_at": "2026-05-28T11:57:28.766Z",
          "evidence_note": "External model quality prior from OpenEvals; not a RunLocalAI local execution measurement."
        }
      ]
    },
    {
      "model_key": "ibm-granite-h-small",
      "display_name": "ibm-granite-h-small",
      "provider": "ibm",
      "aliases": [
        "ibm-granite-h-small"
      ],
      "openness": null,
      "license": "Apache 2.0",
      "parameter_billions": null,
      "context_window": null,
      "modalities": [
        "text"
      ],
      "source_ids": [
        "lmarena"
      ],
      "composite": {
        "score_0_100": 62.21,
        "confidence": "low",
        "components": [
          {
            "source_id": "lmarena",
            "metric": "lmarena_text_overall_rating",
            "normalized_0_100": 62.21,
            "weight": 0.35
          }
        ],
        "derived_from": [
          "lmarena:lmarena_text_overall_rating"
        ],
        "limitations": [
          "This is an external model-intelligence prior, not a RunLocalAI local speed or fit measurement.",
          "Provider papers, human-preference ratings, and LiveBench judgments are not interchangeable with quantized local execution.",
          "Use this score only beside Will-It-Run fit, quantization, context, runtime, and measured tok/s evidence."
        ]
      },
      "scores": [
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_chinese_rating",
          "value": 1246.2802026483441,
          "normalized_0_100": 62.83,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 211,
          "lower": 1205.798260560205,
          "upper": 1286.762144736483,
          "category": "chinese",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_coding_rating",
          "value": 1248.872414453525,
          "normalized_0_100": 63.14,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 238,
          "lower": 1231.5592665816976,
          "upper": 1266.185562325352,
          "category": "coding",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_overall_rating",
          "value": 1241.0971417915237,
          "normalized_0_100": 62.21,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 240,
          "lower": 1232.6547576824478,
          "upper": 1249.5395259005995,
          "category": "overall",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        }
      ]
    },
    {
      "model_key": "sonar",
      "display_name": "sonar",
      "provider": null,
      "aliases": [
        "sonar"
      ],
      "openness": null,
      "license": null,
      "parameter_billions": null,
      "context_window": null,
      "modalities": [
        "text"
      ],
      "source_ids": [
        "livebench"
      ],
      "composite": {
        "score_0_100": 62.18,
        "confidence": "low",
        "components": [
          {
            "source_id": "livebench",
            "metric": "livebench_overall_mean_judgment",
            "normalized_0_100": 62.18,
            "weight": 0.2
          }
        ],
        "derived_from": [
          "livebench:livebench_overall_mean_judgment"
        ],
        "limitations": [
          "This is an external model-intelligence prior, not a RunLocalAI local speed or fit measurement.",
          "Provider papers, human-preference ratings, and LiveBench judgments are not interchangeable with quantized local execution.",
          "Use this score only beside Will-It-Run fit, quantization, context, runtime, and measured tok/s evidence."
        ]
      },
      "scores": [
        {
          "source_id": "livebench",
          "metric": "livebench_category_coding_mean_judgment",
          "value": 0.666667,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "coding",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench category aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_category_instruction_following_mean_judgment",
          "value": 1,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "instruction_following",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench category aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_category_language_mean_judgment",
          "value": 0.176471,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "language",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench category aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_overall_mean_judgment",
          "value": 0.621849,
          "normalized_0_100": 62.18,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench judgment aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_task_coding_completion_mean_judgment",
          "value": 1,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "coding",
          "task": "coding_completion",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench task aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_task_connections_mean_judgment",
          "value": 0,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "language",
          "task": "connections",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench task aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_task_lcb_generation_mean_judgment",
          "value": 0.5,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "coding",
          "task": "LCB_generation",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench task aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_task_paraphrase_mean_judgment",
          "value": 1,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "instruction_following",
          "task": "paraphrase",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench task aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_task_plot_unscrambling_mean_judgment",
          "value": 0.352941,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "language",
          "task": "plot_unscrambling",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench task aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_task_story_generation_mean_judgment",
          "value": 1,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "instruction_following",
          "task": "story_generation",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench task aggregate; not a RunLocalAI local execution measurement."
        }
      ]
    },
    {
      "model_key": "grok-3",
      "display_name": "grok-3",
      "provider": null,
      "aliases": [
        "grok-3"
      ],
      "openness": null,
      "license": null,
      "parameter_billions": null,
      "context_window": null,
      "modalities": [
        "text"
      ],
      "source_ids": [
        "livebench"
      ],
      "composite": {
        "score_0_100": 61.76,
        "confidence": "low",
        "components": [
          {
            "source_id": "livebench",
            "metric": "livebench_overall_mean_judgment",
            "normalized_0_100": 61.76,
            "weight": 0.2
          }
        ],
        "derived_from": [
          "livebench:livebench_overall_mean_judgment"
        ],
        "limitations": [
          "This is an external model-intelligence prior, not a RunLocalAI local speed or fit measurement.",
          "Provider papers, human-preference ratings, and LiveBench judgments are not interchangeable with quantized local execution.",
          "Use this score only beside Will-It-Run fit, quantization, context, runtime, and measured tok/s evidence."
        ]
      },
      "scores": [
        {
          "source_id": "livebench",
          "metric": "livebench_category_coding_mean_judgment",
          "value": 0.5,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "coding",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench category aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_category_language_mean_judgment",
          "value": 0.735294,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "language",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench category aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_overall_mean_judgment",
          "value": 0.617647,
          "normalized_0_100": 61.76,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench judgment aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_task_coding_completion_mean_judgment",
          "value": 0,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "coding",
          "task": "coding_completion",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench task aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_task_lcb_generation_mean_judgment",
          "value": 1,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "coding",
          "task": "LCB_generation",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench task aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_task_plot_unscrambling_mean_judgment",
          "value": 0.470588,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "language",
          "task": "plot_unscrambling",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench task aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_task_typos_mean_judgment",
          "value": 1,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "language",
          "task": "typos",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench task aggregate; not a RunLocalAI local execution measurement."
        }
      ]
    },
    {
      "model_key": "jamba-1-5-large",
      "display_name": "jamba-1.5-large",
      "provider": null,
      "aliases": [
        "jamba-1.5-large"
      ],
      "openness": null,
      "license": "Jamba Open",
      "parameter_billions": null,
      "context_window": null,
      "modalities": [
        "text"
      ],
      "source_ids": [
        "lmarena"
      ],
      "composite": {
        "score_0_100": 61.73,
        "confidence": "low",
        "components": [
          {
            "source_id": "lmarena",
            "metric": "lmarena_text_overall_rating",
            "normalized_0_100": 61.73,
            "weight": 0.35
          }
        ],
        "derived_from": [
          "lmarena:lmarena_text_overall_rating"
        ],
        "limitations": [
          "This is an external model-intelligence prior, not a RunLocalAI local speed or fit measurement.",
          "Provider papers, human-preference ratings, and LiveBench judgments are not interchangeable with quantized local execution.",
          "Use this score only beside Will-It-Run fit, quantization, context, runtime, and measured tok/s evidence."
        ]
      },
      "scores": [
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_chinese_rating",
          "value": 1202.079097371513,
          "normalized_0_100": 57.57,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 246,
          "lower": 1180.324199588873,
          "upper": 1223.833995154153,
          "category": "chinese",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_coding_rating",
          "value": 1228.0386777468398,
          "normalized_0_100": 60.66,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 248,
          "lower": 1213.6217175535044,
          "upper": 1242.455637940175,
          "category": "coding",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_overall_rating",
          "value": 1237.069350786467,
          "normalized_0_100": 61.73,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 242,
          "lower": 1229.7829436300149,
          "upper": 1244.3557579429191,
          "category": "overall",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        }
      ]
    },
    {
      "model_key": "grok-3-beta",
      "display_name": "grok-3-beta",
      "provider": null,
      "aliases": [
        "grok-3-beta"
      ],
      "openness": null,
      "license": null,
      "parameter_billions": null,
      "context_window": null,
      "modalities": [
        "text"
      ],
      "source_ids": [
        "livebench"
      ],
      "composite": {
        "score_0_100": 61.52,
        "confidence": "low",
        "components": [
          {
            "source_id": "livebench",
            "metric": "livebench_overall_mean_judgment",
            "normalized_0_100": 61.52,
            "weight": 0.2
          }
        ],
        "derived_from": [
          "livebench:livebench_overall_mean_judgment"
        ],
        "limitations": [
          "This is an external model-intelligence prior, not a RunLocalAI local speed or fit measurement.",
          "Provider papers, human-preference ratings, and LiveBench judgments are not interchangeable with quantized local execution.",
          "Use this score only beside Will-It-Run fit, quantization, context, runtime, and measured tok/s evidence."
        ]
      },
      "scores": [
        {
          "source_id": "livebench",
          "metric": "livebench_category_coding_mean_judgment",
          "value": 0.5,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "coding",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench category aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_category_instruction_following_mean_judgment",
          "value": 0.666667,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "instruction_following",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench category aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_category_language_mean_judgment",
          "value": 0.647059,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "language",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench category aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_overall_mean_judgment",
          "value": 0.615196,
          "normalized_0_100": 61.52,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench judgment aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_task_coding_completion_mean_judgment",
          "value": 0,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "coding",
          "task": "coding_completion",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench task aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_task_connections_mean_judgment",
          "value": 0.5,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "language",
          "task": "connections",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench task aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_task_lcb_generation_mean_judgment",
          "value": 1,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "coding",
          "task": "LCB_generation",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench task aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_task_paraphrase_mean_judgment",
          "value": 0.333333,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "instruction_following",
          "task": "paraphrase",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench task aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_task_plot_unscrambling_mean_judgment",
          "value": 0.588235,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "language",
          "task": "plot_unscrambling",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench task aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_task_story_generation_mean_judgment",
          "value": 1,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "instruction_following",
          "task": "story_generation",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench task aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_task_typos_mean_judgment",
          "value": 1,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "language",
          "task": "typos",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench task aggregate; not a RunLocalAI local execution measurement."
        }
      ]
    },
    {
      "model_key": "mistral-small-24b-instruct-2501",
      "display_name": "mistral-small-24b-instruct-2501",
      "provider": "mistral",
      "aliases": [
        "mistral-small-24b-instruct-2501"
      ],
      "openness": null,
      "license": "Apache 2.0",
      "parameter_billions": null,
      "context_window": null,
      "modalities": [
        "text"
      ],
      "source_ids": [
        "lmarena"
      ],
      "composite": {
        "score_0_100": 61.32,
        "confidence": "low",
        "components": [
          {
            "source_id": "lmarena",
            "metric": "lmarena_text_overall_rating",
            "normalized_0_100": 61.32,
            "weight": 0.35
          }
        ],
        "derived_from": [
          "lmarena:lmarena_text_overall_rating"
        ],
        "limitations": [
          "This is an external model-intelligence prior, not a RunLocalAI local speed or fit measurement.",
          "Provider papers, human-preference ratings, and LiveBench judgments are not interchangeable with quantized local execution.",
          "Use this score only beside Will-It-Run fit, quantization, context, runtime, and measured tok/s evidence."
        ]
      },
      "scores": [
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_chinese_rating",
          "value": 1203.577781777206,
          "normalized_0_100": 57.75,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 245,
          "lower": 1185.98168836539,
          "upper": 1221.1738751890216,
          "category": "chinese",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_coding_rating",
          "value": 1246.2427382864494,
          "normalized_0_100": 62.82,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 239,
          "lower": 1233.7951149011105,
          "upper": 1258.6903616717882,
          "category": "coding",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_overall_rating",
          "value": 1233.54986199335,
          "normalized_0_100": 61.32,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 243,
          "lower": 1227.6932787930036,
          "upper": 1239.406445193696,
          "category": "overall",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        }
      ]
    },
    {
      "model_key": "chatgpt-4o-latest-2025-03-27",
      "display_name": "chatgpt-4o-latest-2025-03-27",
      "provider": null,
      "aliases": [
        "chatgpt-4o-latest-2025-03-27"
      ],
      "openness": null,
      "license": null,
      "parameter_billions": null,
      "context_window": null,
      "modalities": [
        "text"
      ],
      "source_ids": [
        "livebench"
      ],
      "composite": {
        "score_0_100": 60.71,
        "confidence": "low",
        "components": [
          {
            "source_id": "livebench",
            "metric": "livebench_overall_mean_judgment",
            "normalized_0_100": 60.71,
            "weight": 0.2
          }
        ],
        "derived_from": [
          "livebench:livebench_overall_mean_judgment"
        ],
        "limitations": [
          "This is an external model-intelligence prior, not a RunLocalAI local speed or fit measurement.",
          "Provider papers, human-preference ratings, and LiveBench judgments are not interchangeable with quantized local execution.",
          "Use this score only beside Will-It-Run fit, quantization, context, runtime, and measured tok/s evidence."
        ]
      },
      "scores": [
        {
          "source_id": "livebench",
          "metric": "livebench_category_coding_mean_judgment",
          "value": 0.5,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "coding",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench category aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_category_instruction_following_mean_judgment",
          "value": 0.65,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "instruction_following",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench category aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_category_language_mean_judgment",
          "value": 0.632727,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "language",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench category aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_overall_mean_judgment",
          "value": 0.607071,
          "normalized_0_100": 60.71,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench judgment aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_task_coding_completion_mean_judgment",
          "value": 0,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "coding",
          "task": "coding_completion",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench task aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_task_connections_mean_judgment",
          "value": 1,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "language",
          "task": "connections",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench task aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_task_lcb_generation_mean_judgment",
          "value": 1,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "coding",
          "task": "LCB_generation",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench task aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_task_paraphrase_mean_judgment",
          "value": 0.3,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "instruction_following",
          "task": "paraphrase",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench task aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_task_plot_unscrambling_mean_judgment",
          "value": 0.163636,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "language",
          "task": "plot_unscrambling",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench task aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_task_story_generation_mean_judgment",
          "value": 1,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "instruction_following",
          "task": "story_generation",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench task aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_task_typos_mean_judgment",
          "value": 0.666667,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "language",
          "task": "typos",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench task aggregate; not a RunLocalAI local execution measurement."
        }
      ]
    },
    {
      "model_key": "llama-3-1-nemotron-51b-instruct",
      "display_name": "llama-3.1-nemotron-51b-instruct",
      "provider": "nvidia",
      "aliases": [
        "llama-3.1-nemotron-51b-instruct"
      ],
      "openness": null,
      "license": "Llama 3.1",
      "parameter_billions": null,
      "context_window": null,
      "modalities": [
        "text"
      ],
      "source_ids": [
        "lmarena"
      ],
      "composite": {
        "score_0_100": 60.66,
        "confidence": "low",
        "components": [
          {
            "source_id": "lmarena",
            "metric": "lmarena_text_overall_rating",
            "normalized_0_100": 60.66,
            "weight": 0.35
          }
        ],
        "derived_from": [
          "lmarena:lmarena_text_overall_rating"
        ],
        "limitations": [
          "This is an external model-intelligence prior, not a RunLocalAI local speed or fit measurement.",
          "Provider papers, human-preference ratings, and LiveBench judgments are not interchangeable with quantized local execution.",
          "Use this score only beside Will-It-Run fit, quantization, context, runtime, and measured tok/s evidence."
        ]
      },
      "scores": [
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_chinese_rating",
          "value": 1178.605983545137,
          "normalized_0_100": 54.78,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 258,
          "lower": 1150.6863885633645,
          "upper": 1206.5255785269096,
          "category": "chinese",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_coding_rating",
          "value": 1223.449719209464,
          "normalized_0_100": 60.11,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 249,
          "lower": 1202.1729517423678,
          "upper": 1244.7264866765604,
          "category": "coding",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_overall_rating",
          "value": 1228.027826413756,
          "normalized_0_100": 60.66,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 248,
          "lower": 1218.0814139176796,
          "upper": 1237.9742389098324,
          "category": "overall",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        }
      ]
    },
    {
      "model_key": "gemma-2-9b-it-simpo",
      "display_name": "gemma-2-9b-it-simpo",
      "provider": null,
      "aliases": [
        "gemma-2-9b-it-simpo"
      ],
      "openness": null,
      "license": "MIT",
      "parameter_billions": null,
      "context_window": null,
      "modalities": [
        "text"
      ],
      "source_ids": [
        "lmarena"
      ],
      "composite": {
        "score_0_100": 60.55,
        "confidence": "low",
        "components": [
          {
            "source_id": "lmarena",
            "metric": "lmarena_text_overall_rating",
            "normalized_0_100": 60.55,
            "weight": 0.35
          }
        ],
        "derived_from": [
          "lmarena:lmarena_text_overall_rating"
        ],
        "limitations": [
          "This is an external model-intelligence prior, not a RunLocalAI local speed or fit measurement.",
          "Provider papers, human-preference ratings, and LiveBench judgments are not interchangeable with quantized local execution.",
          "Use this score only beside Will-It-Run fit, quantization, context, runtime, and measured tok/s evidence."
        ]
      },
      "scores": [
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_chinese_rating",
          "value": 1224.1585252877626,
          "normalized_0_100": 60.2,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 232,
          "lower": 1203.3570225319945,
          "upper": 1244.960028043531,
          "category": "chinese",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_coding_rating",
          "value": 1191.404705014518,
          "normalized_0_100": 56.3,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 263,
          "lower": 1176.6195748611956,
          "upper": 1206.1898351678406,
          "category": "coding",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_overall_rating",
          "value": 1227.1093126022042,
          "normalized_0_100": 60.55,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 249,
          "lower": 1220.2656271810793,
          "upper": 1233.9529980233294,
          "category": "overall",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        }
      ]
    },
    {
      "model_key": "glm-4-0520",
      "display_name": "glm-4-0520",
      "provider": "zai",
      "aliases": [
        "glm-4-0520"
      ],
      "openness": null,
      "license": "Proprietary",
      "parameter_billions": null,
      "context_window": null,
      "modalities": [
        "text"
      ],
      "source_ids": [
        "lmarena"
      ],
      "composite": {
        "score_0_100": 60.41,
        "confidence": "low",
        "components": [
          {
            "source_id": "lmarena",
            "metric": "lmarena_text_overall_rating",
            "normalized_0_100": 60.41,
            "weight": 0.35
          }
        ],
        "derived_from": [
          "lmarena:lmarena_text_overall_rating"
        ],
        "limitations": [
          "This is an external model-intelligence prior, not a RunLocalAI local speed or fit measurement.",
          "Provider papers, human-preference ratings, and LiveBench judgments are not interchangeable with quantized local execution.",
          "Use this score only beside Will-It-Run fit, quantization, context, runtime, and measured tok/s evidence."
        ]
      },
      "scores": [
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_chinese_rating",
          "value": 1227.573619855128,
          "normalized_0_100": 60.6,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 227,
          "lower": 1210.767859529137,
          "upper": 1244.379380181119,
          "category": "chinese",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_coding_rating",
          "value": 1228.144673105038,
          "normalized_0_100": 60.67,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 247,
          "lower": 1214.1933626174418,
          "upper": 1242.095983592634,
          "category": "coding",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_overall_rating",
          "value": 1225.9528554245562,
          "normalized_0_100": 60.41,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 250,
          "lower": 1219.016852823982,
          "upper": 1232.8888580251303,
          "category": "overall",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        }
      ]
    },
    {
      "model_key": "nemotron-4-340b-instruct",
      "display_name": "nemotron-4-340b-instruct",
      "provider": "nvidia",
      "aliases": [
        "nemotron-4-340b-instruct"
      ],
      "openness": null,
      "license": "NVIDIA Open Model",
      "parameter_billions": null,
      "context_window": null,
      "modalities": [
        "text"
      ],
      "source_ids": [
        "lmarena"
      ],
      "composite": {
        "score_0_100": 60.28,
        "confidence": "low",
        "components": [
          {
            "source_id": "lmarena",
            "metric": "lmarena_text_overall_rating",
            "normalized_0_100": 60.28,
            "weight": 0.35
          }
        ],
        "derived_from": [
          "lmarena:lmarena_text_overall_rating"
        ],
        "limitations": [
          "This is an external model-intelligence prior, not a RunLocalAI local speed or fit measurement.",
          "Provider papers, human-preference ratings, and LiveBench judgments are not interchangeable with quantized local execution.",
          "Use this score only beside Will-It-Run fit, quantization, context, runtime, and measured tok/s evidence."
        ]
      },
      "scores": [
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_chinese_rating",
          "value": 1212.84164371683,
          "normalized_0_100": 58.85,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 239,
          "lower": 1199.7077247305003,
          "upper": 1225.97556270316,
          "category": "chinese",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_coding_rating",
          "value": 1209.7672898901958,
          "normalized_0_100": 58.49,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 255,
          "lower": 1198.489537029077,
          "upper": 1221.0450427513147,
          "category": "coding",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_overall_rating",
          "value": 1224.8581327177626,
          "normalized_0_100": 60.28,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 252,
          "lower": 1219.5756942743556,
          "upper": 1230.1405711611696,
          "category": "overall",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        }
      ]
    },
    {
      "model_key": "c4ai-aya-expanse-32b",
      "display_name": "c4ai-aya-expanse-32b",
      "provider": "cohere",
      "aliases": [
        "c4ai-aya-expanse-32b"
      ],
      "openness": null,
      "license": "CC-BY-NC-4.0",
      "parameter_billions": null,
      "context_window": null,
      "modalities": [
        "text"
      ],
      "source_ids": [
        "lmarena"
      ],
      "composite": {
        "score_0_100": 60.19,
        "confidence": "low",
        "components": [
          {
            "source_id": "lmarena",
            "metric": "lmarena_text_overall_rating",
            "normalized_0_100": 60.19,
            "weight": 0.35
          }
        ],
        "derived_from": [
          "lmarena:lmarena_text_overall_rating"
        ],
        "limitations": [
          "This is an external model-intelligence prior, not a RunLocalAI local speed or fit measurement.",
          "Provider papers, human-preference ratings, and LiveBench judgments are not interchangeable with quantized local execution.",
          "Use this score only beside Will-It-Run fit, quantization, context, runtime, and measured tok/s evidence."
        ]
      },
      "scores": [
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_chinese_rating",
          "value": 1209.6362232602983,
          "normalized_0_100": 58.47,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 242,
          "lower": 1197.3239696989378,
          "upper": 1221.9484768216587,
          "category": "chinese",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_coding_rating",
          "value": 1197.0975022414887,
          "normalized_0_100": 56.98,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 260,
          "lower": 1188.0413155478566,
          "upper": 1206.1536889351207,
          "category": "coding",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_overall_rating",
          "value": 1224.059114851398,
          "normalized_0_100": 60.19,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 253,
          "lower": 1219.2230495587096,
          "upper": 1228.8951801440862,
          "category": "overall",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        }
      ]
    },
    {
      "model_key": "deepseek-coder",
      "display_name": "deepseek-coder",
      "provider": null,
      "aliases": [
        "deepseek-coder"
      ],
      "openness": null,
      "license": null,
      "parameter_billions": null,
      "context_window": null,
      "modalities": [
        "text"
      ],
      "source_ids": [
        "livebench"
      ],
      "composite": {
        "score_0_100": 60,
        "confidence": "low",
        "components": [
          {
            "source_id": "livebench",
            "metric": "livebench_overall_mean_judgment",
            "normalized_0_100": 60,
            "weight": 0.2
          }
        ],
        "derived_from": [
          "livebench:livebench_overall_mean_judgment"
        ],
        "limitations": [
          "This is an external model-intelligence prior, not a RunLocalAI local speed or fit measurement.",
          "Provider papers, human-preference ratings, and LiveBench judgments are not interchangeable with quantized local execution.",
          "Use this score only beside Will-It-Run fit, quantization, context, runtime, and measured tok/s evidence."
        ]
      },
      "scores": [
        {
          "source_id": "livebench",
          "metric": "livebench_category_coding_mean_judgment",
          "value": 0.5,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "coding",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench category aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_category_instruction_following_mean_judgment",
          "value": 1,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "instruction_following",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench category aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_category_language_mean_judgment",
          "value": 0.5,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "language",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench category aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_overall_mean_judgment",
          "value": 0.6,
          "normalized_0_100": 60,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench judgment aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_task_connections_mean_judgment",
          "value": 0,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "language",
          "task": "connections",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench task aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_task_lcb_generation_mean_judgment",
          "value": 0.5,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "coding",
          "task": "LCB_generation",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench task aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_task_story_generation_mean_judgment",
          "value": 1,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "instruction_following",
          "task": "story_generation",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench task aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_task_typos_mean_judgment",
          "value": 1,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "language",
          "task": "typos",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench task aggregate; not a RunLocalAI local execution measurement."
        }
      ]
    },
    {
      "model_key": "moonshotai-kimi-k2-thinking",
      "display_name": "moonshotai/Kimi-K2-Thinking",
      "provider": "moonshotai",
      "aliases": [
        "moonshotai/Kimi-K2-Thinking"
      ],
      "openness": "open",
      "license": "Unknown",
      "parameter_billions": 1058.1,
      "context_window": null,
      "modalities": [
        "text"
      ],
      "source_ids": [
        "openevals"
      ],
      "composite": {
        "score_0_100": 60,
        "confidence": "low",
        "components": [
          {
            "source_id": "openevals",
            "metric": "openevals_aggregate",
            "normalized_0_100": 60,
            "weight": 0.45
          }
        ],
        "derived_from": [
          "openevals:openevals_aggregate"
        ],
        "limitations": [
          "This is an external model-intelligence prior, not a RunLocalAI local speed or fit measurement.",
          "Provider papers, human-preference ratings, and LiveBench judgments are not interchangeable with quantized local execution.",
          "Use this score only beside Will-It-Run fit, quantization, context, runtime, and measured tok/s evidence."
        ]
      },
      "scores": [
        {
          "source_id": "openevals",
          "metric": "gpqa",
          "value": 84.5,
          "normalized_0_100": 84.5,
          "scale": "0-100",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/OpenEvals/leaderboard-data",
          "source_dataset": "OpenEvals/leaderboard-data",
          "source_config": "default",
          "source_split": "train",
          "source_row_count": 105,
          "captured_at": "2026-05-28T11:57:28.766Z",
          "evidence_note": "External model quality prior from OpenEvals; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "openevals",
          "metric": "hle",
          "value": 23.9,
          "normalized_0_100": 23.9,
          "scale": "0-100",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/OpenEvals/leaderboard-data",
          "source_dataset": "OpenEvals/leaderboard-data",
          "source_config": "default",
          "source_split": "train",
          "source_row_count": 105,
          "captured_at": "2026-05-28T11:57:28.766Z",
          "evidence_note": "External model quality prior from OpenEvals; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "openevals",
          "metric": "mmlu_pro",
          "value": 84.6,
          "normalized_0_100": 84.6,
          "scale": "0-100",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/OpenEvals/leaderboard-data",
          "source_dataset": "OpenEvals/leaderboard-data",
          "source_config": "default",
          "source_split": "train",
          "source_row_count": 105,
          "captured_at": "2026-05-28T11:57:28.766Z",
          "evidence_note": "External model quality prior from OpenEvals; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "openevals",
          "metric": "openevals_aggregate",
          "value": 60,
          "normalized_0_100": 60,
          "scale": "0-100",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/OpenEvals/leaderboard-data",
          "source_dataset": "OpenEvals/leaderboard-data",
          "source_config": "default",
          "source_split": "train",
          "source_row_count": 105,
          "captured_at": "2026-05-28T11:57:28.766Z",
          "evidence_note": "External model quality prior from OpenEvals; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "openevals",
          "metric": "swe_verified",
          "value": 71.3,
          "normalized_0_100": 71.3,
          "scale": "0-100",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/OpenEvals/leaderboard-data",
          "source_dataset": "OpenEvals/leaderboard-data",
          "source_config": "default",
          "source_split": "train",
          "source_row_count": 105,
          "captured_at": "2026-05-28T11:57:28.766Z",
          "evidence_note": "External model quality prior from OpenEvals; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "openevals",
          "metric": "terminal_bench",
          "value": 35.7,
          "normalized_0_100": 35.7,
          "scale": "0-100",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/OpenEvals/leaderboard-data",
          "source_dataset": "OpenEvals/leaderboard-data",
          "source_config": "default",
          "source_split": "train",
          "source_row_count": 105,
          "captured_at": "2026-05-28T11:57:28.766Z",
          "evidence_note": "External model quality prior from OpenEvals; not a RunLocalAI local execution measurement."
        }
      ]
    },
    {
      "model_key": "llama-3-70b-instruct",
      "display_name": "llama-3-70b-instruct",
      "provider": "meta",
      "aliases": [
        "llama-3-70b-instruct"
      ],
      "openness": null,
      "license": "Llama 3 Community",
      "parameter_billions": null,
      "context_window": null,
      "modalities": [
        "text"
      ],
      "source_ids": [
        "lmarena"
      ],
      "composite": {
        "score_0_100": 59.8,
        "confidence": "low",
        "components": [
          {
            "source_id": "lmarena",
            "metric": "lmarena_text_overall_rating",
            "normalized_0_100": 59.8,
            "weight": 0.35
          }
        ],
        "derived_from": [
          "lmarena:lmarena_text_overall_rating"
        ],
        "limitations": [
          "This is an external model-intelligence prior, not a RunLocalAI local speed or fit measurement.",
          "Provider papers, human-preference ratings, and LiveBench judgments are not interchangeable with quantized local execution.",
          "Use this score only beside Will-It-Run fit, quantization, context, runtime, and measured tok/s evidence."
        ]
      },
      "scores": [
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_chinese_rating",
          "value": 1113.2745830506203,
          "normalized_0_100": 47.01,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 278,
          "lower": 1104.3495864778706,
          "upper": 1122.19957962337,
          "category": "chinese",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_coding_rating",
          "value": 1206.642617181662,
          "normalized_0_100": 58.12,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 257,
          "lower": 1199.7005739021683,
          "upper": 1213.5846604611556,
          "category": "coding",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_overall_rating",
          "value": 1220.8415360600047,
          "normalized_0_100": 59.8,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 254,
          "lower": 1217.2671971041339,
          "upper": 1224.4158750158758,
          "category": "overall",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        }
      ]
    },
    {
      "model_key": "o1-2024-12-17-low",
      "display_name": "o1-2024-12-17-low",
      "provider": null,
      "aliases": [
        "o1-2024-12-17-low"
      ],
      "openness": null,
      "license": null,
      "parameter_billions": null,
      "context_window": null,
      "modalities": [
        "text"
      ],
      "source_ids": [
        "livebench"
      ],
      "composite": {
        "score_0_100": 59.8,
        "confidence": "low",
        "components": [
          {
            "source_id": "livebench",
            "metric": "livebench_overall_mean_judgment",
            "normalized_0_100": 59.8,
            "weight": 0.2
          }
        ],
        "derived_from": [
          "livebench:livebench_overall_mean_judgment"
        ],
        "limitations": [
          "This is an external model-intelligence prior, not a RunLocalAI local speed or fit measurement.",
          "Provider papers, human-preference ratings, and LiveBench judgments are not interchangeable with quantized local execution.",
          "Use this score only beside Will-It-Run fit, quantization, context, runtime, and measured tok/s evidence."
        ]
      },
      "scores": [
        {
          "source_id": "livebench",
          "metric": "livebench_category_coding_mean_judgment",
          "value": 1,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "coding",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench category aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_category_instruction_following_mean_judgment",
          "value": 0.333333,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "instruction_following",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench category aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_category_language_mean_judgment",
          "value": 0.529412,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "language",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench category aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_overall_mean_judgment",
          "value": 0.598039,
          "normalized_0_100": 59.8,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench judgment aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_task_coding_completion_mean_judgment",
          "value": 1,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "coding",
          "task": "coding_completion",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench task aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_task_paraphrase_mean_judgment",
          "value": 0.333333,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "instruction_following",
          "task": "paraphrase",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench task aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_task_plot_unscrambling_mean_judgment",
          "value": 0.058824,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "language",
          "task": "plot_unscrambling",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench task aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_task_typos_mean_judgment",
          "value": 1,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "language",
          "task": "typos",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench task aggregate; not a RunLocalAI local execution measurement."
        }
      ]
    },
    {
      "model_key": "dracarys2-llama-3-1-70b-instruct",
      "display_name": "dracarys2-llama-3.1-70b-instruct",
      "provider": null,
      "aliases": [
        "dracarys2-llama-3.1-70b-instruct"
      ],
      "openness": null,
      "license": null,
      "parameter_billions": null,
      "context_window": null,
      "modalities": [
        "text"
      ],
      "source_ids": [
        "livebench"
      ],
      "composite": {
        "score_0_100": 59.7,
        "confidence": "low",
        "components": [
          {
            "source_id": "livebench",
            "metric": "livebench_overall_mean_judgment",
            "normalized_0_100": 59.7,
            "weight": 0.2
          }
        ],
        "derived_from": [
          "livebench:livebench_overall_mean_judgment"
        ],
        "limitations": [
          "This is an external model-intelligence prior, not a RunLocalAI local speed or fit measurement.",
          "Provider papers, human-preference ratings, and LiveBench judgments are not interchangeable with quantized local execution.",
          "Use this score only beside Will-It-Run fit, quantization, context, runtime, and measured tok/s evidence."
        ]
      },
      "scores": [
        {
          "source_id": "livebench",
          "metric": "livebench_category_coding_mean_judgment",
          "value": 1,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "coding",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench category aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_category_instruction_following_mean_judgment",
          "value": 0.4,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "instruction_following",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench category aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_category_language_mean_judgment",
          "value": 0.545455,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "language",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench category aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_overall_mean_judgment",
          "value": 0.59697,
          "normalized_0_100": 59.7,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench judgment aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_task_lcb_generation_mean_judgment",
          "value": 1,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "coding",
          "task": "LCB_generation",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench task aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_task_paraphrase_mean_judgment",
          "value": 0.4,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "instruction_following",
          "task": "paraphrase",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench task aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_task_plot_unscrambling_mean_judgment",
          "value": 0.181818,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "language",
          "task": "plot_unscrambling",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench task aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_task_typos_mean_judgment",
          "value": 0.666667,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "language",
          "task": "typos",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench task aggregate; not a RunLocalAI local execution measurement."
        }
      ]
    },
    {
      "model_key": "olmo-2-0325-32b-instruct",
      "display_name": "olmo-2-0325-32b-instruct",
      "provider": "allenai",
      "aliases": [
        "olmo-2-0325-32b-instruct"
      ],
      "openness": null,
      "license": "Apache-2.0",
      "parameter_billions": null,
      "context_window": null,
      "modalities": [
        "text"
      ],
      "source_ids": [
        "lmarena"
      ],
      "composite": {
        "score_0_100": 59.46,
        "confidence": "low",
        "components": [
          {
            "source_id": "lmarena",
            "metric": "lmarena_text_overall_rating",
            "normalized_0_100": 59.46,
            "weight": 0.35
          }
        ],
        "derived_from": [
          "lmarena:lmarena_text_overall_rating"
        ],
        "limitations": [
          "This is an external model-intelligence prior, not a RunLocalAI local speed or fit measurement.",
          "Provider papers, human-preference ratings, and LiveBench judgments are not interchangeable with quantized local execution.",
          "Use this score only beside Will-It-Run fit, quantization, context, runtime, and measured tok/s evidence."
        ]
      },
      "scores": [
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_chinese_rating",
          "value": 1191.1860449645214,
          "normalized_0_100": 56.28,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 250,
          "lower": 1155.0220108067772,
          "upper": 1227.350079122266,
          "category": "chinese",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_coding_rating",
          "value": 1210.6309570901153,
          "normalized_0_100": 58.59,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 254,
          "lower": 1183.1918996458883,
          "upper": 1238.0700145343424,
          "category": "coding",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_overall_rating",
          "value": 1217.9279824063958,
          "normalized_0_100": 59.46,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 257,
          "lower": 1207.135155478512,
          "upper": 1228.7208093342797,
          "category": "overall",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        }
      ]
    },
    {
      "model_key": "reka-flash-20240904",
      "display_name": "reka-flash-20240904",
      "provider": null,
      "aliases": [
        "reka-flash-20240904"
      ],
      "openness": null,
      "license": "Proprietary",
      "parameter_billions": null,
      "context_window": null,
      "modalities": [
        "text"
      ],
      "source_ids": [
        "lmarena"
      ],
      "composite": {
        "score_0_100": 59.46,
        "confidence": "low",
        "components": [
          {
            "source_id": "lmarena",
            "metric": "lmarena_text_overall_rating",
            "normalized_0_100": 59.46,
            "weight": 0.35
          }
        ],
        "derived_from": [
          "lmarena:lmarena_text_overall_rating"
        ],
        "limitations": [
          "This is an external model-intelligence prior, not a RunLocalAI local speed or fit measurement.",
          "Provider papers, human-preference ratings, and LiveBench judgments are not interchangeable with quantized local execution.",
          "Use this score only beside Will-It-Run fit, quantization, context, runtime, and measured tok/s evidence."
        ]
      },
      "scores": [
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_chinese_rating",
          "value": 1219.7910609235423,
          "normalized_0_100": 59.68,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 235,
          "lower": 1196.6793692970514,
          "upper": 1242.9027525500333,
          "category": "chinese",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_coding_rating",
          "value": 1190.489752064417,
          "normalized_0_100": 56.19,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 264,
          "lower": 1175.287561498897,
          "upper": 1205.691942629937,
          "category": "coding",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_overall_rating",
          "value": 1217.964467030516,
          "normalized_0_100": 59.46,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 256,
          "lower": 1211.0353916703812,
          "upper": 1224.8935423906507,
          "category": "overall",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        }
      ]
    },
    {
      "model_key": "chatgpt-4o-latest-2025-01-29",
      "display_name": "chatgpt-4o-latest-2025-01-29",
      "provider": null,
      "aliases": [
        "chatgpt-4o-latest-2025-01-29"
      ],
      "openness": null,
      "license": null,
      "parameter_billions": null,
      "context_window": null,
      "modalities": [
        "text"
      ],
      "source_ids": [
        "livebench"
      ],
      "composite": {
        "score_0_100": 59.1,
        "confidence": "low",
        "components": [
          {
            "source_id": "livebench",
            "metric": "livebench_overall_mean_judgment",
            "normalized_0_100": 59.1,
            "weight": 0.2
          }
        ],
        "derived_from": [
          "livebench:livebench_overall_mean_judgment"
        ],
        "limitations": [
          "This is an external model-intelligence prior, not a RunLocalAI local speed or fit measurement.",
          "Provider papers, human-preference ratings, and LiveBench judgments are not interchangeable with quantized local execution.",
          "Use this score only beside Will-It-Run fit, quantization, context, runtime, and measured tok/s evidence."
        ]
      },
      "scores": [
        {
          "source_id": "livebench",
          "metric": "livebench_category_coding_mean_judgment",
          "value": 1,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "coding",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench category aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_category_instruction_following_mean_judgment",
          "value": 0.4,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "instruction_following",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench category aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_category_language_mean_judgment",
          "value": 0.434175,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "language",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench category aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_overall_mean_judgment",
          "value": 0.590957,
          "normalized_0_100": 59.1,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench judgment aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_task_coding_completion_mean_judgment",
          "value": 1,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "coding",
          "task": "coding_completion",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench task aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_task_lcb_generation_mean_judgment",
          "value": 1,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "coding",
          "task": "LCB_generation",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench task aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_task_paraphrase_mean_judgment",
          "value": 0.4,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "instruction_following",
          "task": "paraphrase",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench task aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_task_plot_unscrambling_mean_judgment",
          "value": 0.36835,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "language",
          "task": "plot_unscrambling",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench task aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_task_typos_mean_judgment",
          "value": 0.5,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "language",
          "task": "typos",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench task aggregate; not a RunLocalAI local execution measurement."
        }
      ]
    },
    {
      "model_key": "minimaxai-minimax-m2-5",
      "display_name": "MiniMaxAI/MiniMax-M2.5",
      "provider": "MiniMaxAI",
      "aliases": [
        "MiniMaxAI/MiniMax-M2.5"
      ],
      "openness": "open",
      "license": "Unknown",
      "parameter_billions": 228.7,
      "context_window": null,
      "modalities": [
        "text"
      ],
      "source_ids": [
        "openevals"
      ],
      "composite": {
        "score_0_100": 58.95,
        "confidence": "low",
        "components": [
          {
            "source_id": "openevals",
            "metric": "openevals_aggregate",
            "normalized_0_100": 58.95,
            "weight": 0.45
          }
        ],
        "derived_from": [
          "openevals:openevals_aggregate"
        ],
        "limitations": [
          "This is an external model-intelligence prior, not a RunLocalAI local speed or fit measurement.",
          "Provider papers, human-preference ratings, and LiveBench judgments are not interchangeable with quantized local execution.",
          "Use this score only beside Will-It-Run fit, quantization, context, runtime, and measured tok/s evidence."
        ]
      },
      "scores": [
        {
          "source_id": "openevals",
          "metric": "gpqa",
          "value": 85.2,
          "normalized_0_100": 85.2,
          "scale": "0-100",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/OpenEvals/leaderboard-data",
          "source_dataset": "OpenEvals/leaderboard-data",
          "source_config": "default",
          "source_split": "train",
          "source_row_count": 105,
          "captured_at": "2026-05-28T11:57:28.766Z",
          "evidence_note": "External model quality prior from OpenEvals; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "openevals",
          "metric": "hle",
          "value": 19.4,
          "normalized_0_100": 19.4,
          "scale": "0-100",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/OpenEvals/leaderboard-data",
          "source_dataset": "OpenEvals/leaderboard-data",
          "source_config": "default",
          "source_split": "train",
          "source_row_count": 105,
          "captured_at": "2026-05-28T11:57:28.766Z",
          "evidence_note": "External model quality prior from OpenEvals; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "openevals",
          "metric": "openevals_aggregate",
          "value": 58.95,
          "normalized_0_100": 58.95,
          "scale": "0-100",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/OpenEvals/leaderboard-data",
          "source_dataset": "OpenEvals/leaderboard-data",
          "source_config": "default",
          "source_split": "train",
          "source_row_count": 105,
          "captured_at": "2026-05-28T11:57:28.766Z",
          "evidence_note": "External model quality prior from OpenEvals; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "openevals",
          "metric": "swe_pro",
          "value": 55.4,
          "normalized_0_100": 55.4,
          "scale": "0-100",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/OpenEvals/leaderboard-data",
          "source_dataset": "OpenEvals/leaderboard-data",
          "source_config": "default",
          "source_split": "train",
          "source_row_count": 105,
          "captured_at": "2026-05-28T11:57:28.766Z",
          "evidence_note": "External model quality prior from OpenEvals; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "openevals",
          "metric": "swe_verified",
          "value": 75.8,
          "normalized_0_100": 75.8,
          "scale": "0-100",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/OpenEvals/leaderboard-data",
          "source_dataset": "OpenEvals/leaderboard-data",
          "source_config": "default",
          "source_split": "train",
          "source_row_count": 105,
          "captured_at": "2026-05-28T11:57:28.766Z",
          "evidence_note": "External model quality prior from OpenEvals; not a RunLocalAI local execution measurement."
        }
      ]
    },
    {
      "model_key": "lgai-exaone-k-exaone-236b-a23b",
      "display_name": "LGAI-EXAONE/K-EXAONE-236B-A23B",
      "provider": "LGAI-EXAONE",
      "aliases": [
        "LGAI-EXAONE/K-EXAONE-236B-A23B"
      ],
      "openness": "open",
      "license": "Unknown",
      "parameter_billions": 237.1,
      "context_window": null,
      "modalities": [
        "text"
      ],
      "source_ids": [
        "openevals"
      ],
      "composite": {
        "score_0_100": 58.83,
        "confidence": "low",
        "components": [
          {
            "source_id": "openevals",
            "metric": "openevals_aggregate",
            "normalized_0_100": 58.83,
            "weight": 0.45
          }
        ],
        "derived_from": [
          "openevals:openevals_aggregate"
        ],
        "limitations": [
          "This is an external model-intelligence prior, not a RunLocalAI local speed or fit measurement.",
          "Provider papers, human-preference ratings, and LiveBench judgments are not interchangeable with quantized local execution.",
          "Use this score only beside Will-It-Run fit, quantization, context, runtime, and measured tok/s evidence."
        ]
      },
      "scores": [
        {
          "source_id": "openevals",
          "metric": "gpqa",
          "value": 79.1,
          "normalized_0_100": 79.1,
          "scale": "0-100",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/OpenEvals/leaderboard-data",
          "source_dataset": "OpenEvals/leaderboard-data",
          "source_config": "default",
          "source_split": "train",
          "source_row_count": 105,
          "captured_at": "2026-05-28T11:57:28.766Z",
          "evidence_note": "External model quality prior from OpenEvals; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "openevals",
          "metric": "hle",
          "value": 13.6,
          "normalized_0_100": 13.6,
          "scale": "0-100",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/OpenEvals/leaderboard-data",
          "source_dataset": "OpenEvals/leaderboard-data",
          "source_config": "default",
          "source_split": "train",
          "source_row_count": 105,
          "captured_at": "2026-05-28T11:57:28.766Z",
          "evidence_note": "External model quality prior from OpenEvals; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "openevals",
          "metric": "mmlu_pro",
          "value": 83.8,
          "normalized_0_100": 83.8,
          "scale": "0-100",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/OpenEvals/leaderboard-data",
          "source_dataset": "OpenEvals/leaderboard-data",
          "source_config": "default",
          "source_split": "train",
          "source_row_count": 105,
          "captured_at": "2026-05-28T11:57:28.766Z",
          "evidence_note": "External model quality prior from OpenEvals; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "openevals",
          "metric": "openevals_aggregate",
          "value": 58.83,
          "normalized_0_100": 58.83,
          "scale": "0-100",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/OpenEvals/leaderboard-data",
          "source_dataset": "OpenEvals/leaderboard-data",
          "source_config": "default",
          "source_split": "train",
          "source_row_count": 105,
          "captured_at": "2026-05-28T11:57:28.766Z",
          "evidence_note": "External model quality prior from OpenEvals; not a RunLocalAI local execution measurement."
        }
      ]
    },
    {
      "model_key": "gemini-2-0-flash-thinking-exp-1219",
      "display_name": "gemini-2.0-flash-thinking-exp-1219",
      "provider": null,
      "aliases": [
        "gemini-2.0-flash-thinking-exp-1219"
      ],
      "openness": null,
      "license": null,
      "parameter_billions": null,
      "context_window": null,
      "modalities": [
        "text"
      ],
      "source_ids": [
        "livebench"
      ],
      "composite": {
        "score_0_100": 58.21,
        "confidence": "low",
        "components": [
          {
            "source_id": "livebench",
            "metric": "livebench_overall_mean_judgment",
            "normalized_0_100": 58.21,
            "weight": 0.2
          }
        ],
        "derived_from": [
          "livebench:livebench_overall_mean_judgment"
        ],
        "limitations": [
          "This is an external model-intelligence prior, not a RunLocalAI local speed or fit measurement.",
          "Provider papers, human-preference ratings, and LiveBench judgments are not interchangeable with quantized local execution.",
          "Use this score only beside Will-It-Run fit, quantization, context, runtime, and measured tok/s evidence."
        ]
      },
      "scores": [
        {
          "source_id": "livebench",
          "metric": "livebench_category_coding_mean_judgment",
          "value": 1,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "coding",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench category aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_category_instruction_following_mean_judgment",
          "value": 0.4,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "instruction_following",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench category aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_category_language_mean_judgment",
          "value": 0.534973,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "language",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench category aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_overall_mean_judgment",
          "value": 0.582124,
          "normalized_0_100": 58.21,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench judgment aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_task_lcb_generation_mean_judgment",
          "value": 1,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "coding",
          "task": "LCB_generation",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench task aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_task_paraphrase_mean_judgment",
          "value": 0.4,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "instruction_following",
          "task": "paraphrase",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench task aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_task_plot_unscrambling_mean_judgment",
          "value": 0.337433,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "language",
          "task": "plot_unscrambling",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench task aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_task_typos_mean_judgment",
          "value": 0.666667,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "language",
          "task": "typos",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench task aggregate; not a RunLocalAI local execution measurement."
        }
      ]
    },
    {
      "model_key": "gpt-4-0314",
      "display_name": "gpt-4-0314",
      "provider": "openai",
      "aliases": [
        "gpt-4-0314"
      ],
      "openness": null,
      "license": "Proprietary",
      "parameter_billions": null,
      "context_window": null,
      "modalities": [
        "text"
      ],
      "source_ids": [
        "lmarena"
      ],
      "composite": {
        "score_0_100": 58.05,
        "confidence": "low",
        "components": [
          {
            "source_id": "lmarena",
            "metric": "lmarena_text_overall_rating",
            "normalized_0_100": 58.05,
            "weight": 0.35
          }
        ],
        "derived_from": [
          "lmarena:lmarena_text_overall_rating"
        ],
        "limitations": [
          "This is an external model-intelligence prior, not a RunLocalAI local speed or fit measurement.",
          "Provider papers, human-preference ratings, and LiveBench judgments are not interchangeable with quantized local execution.",
          "Use this score only beside Will-It-Run fit, quantization, context, runtime, and measured tok/s evidence."
        ]
      },
      "scores": [
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_chinese_rating",
          "value": 1182.9752739707974,
          "normalized_0_100": 55.3,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 255,
          "lower": 1171.3492008468138,
          "upper": 1194.601347094781,
          "category": "chinese",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_coding_rating",
          "value": 1209.200621341391,
          "normalized_0_100": 58.42,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 256,
          "lower": 1199.857709543603,
          "upper": 1218.5435331391793,
          "category": "coding",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_overall_rating",
          "value": 1206.0525850723452,
          "normalized_0_100": 58.05,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 261,
          "lower": 1201.3035147194735,
          "upper": 1210.8016554252167,
          "category": "overall",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        }
      ]
    },
    {
      "model_key": "nvidia-nemotron-cascade-2-30b-a3b",
      "display_name": "nvidia/Nemotron-Cascade-2-30B-A3B",
      "provider": "nvidia",
      "aliases": [
        "nvidia/Nemotron-Cascade-2-30B-A3B"
      ],
      "openness": "open",
      "license": "Unknown",
      "parameter_billions": 31.6,
      "context_window": null,
      "modalities": [
        "text"
      ],
      "source_ids": [
        "openevals"
      ],
      "composite": {
        "score_0_100": 57.87,
        "confidence": "low",
        "components": [
          {
            "source_id": "openevals",
            "metric": "openevals_aggregate",
            "normalized_0_100": 57.87,
            "weight": 0.45
          }
        ],
        "derived_from": [
          "openevals:openevals_aggregate"
        ],
        "limitations": [
          "This is an external model-intelligence prior, not a RunLocalAI local speed or fit measurement.",
          "Provider papers, human-preference ratings, and LiveBench judgments are not interchangeable with quantized local execution.",
          "Use this score only beside Will-It-Run fit, quantization, context, runtime, and measured tok/s evidence."
        ]
      },
      "scores": [
        {
          "source_id": "openevals",
          "metric": "gpqa",
          "value": 76.1,
          "normalized_0_100": 76.1,
          "scale": "0-100",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/OpenEvals/leaderboard-data",
          "source_dataset": "OpenEvals/leaderboard-data",
          "source_config": "default",
          "source_split": "train",
          "source_row_count": 105,
          "captured_at": "2026-05-28T11:57:28.766Z",
          "evidence_note": "External model quality prior from OpenEvals; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "openevals",
          "metric": "hle",
          "value": 17.7,
          "normalized_0_100": 17.7,
          "scale": "0-100",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/OpenEvals/leaderboard-data",
          "source_dataset": "OpenEvals/leaderboard-data",
          "source_config": "default",
          "source_split": "train",
          "source_row_count": 105,
          "captured_at": "2026-05-28T11:57:28.766Z",
          "evidence_note": "External model quality prior from OpenEvals; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "openevals",
          "metric": "mmlu_pro",
          "value": 79.8,
          "normalized_0_100": 79.8,
          "scale": "0-100",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/OpenEvals/leaderboard-data",
          "source_dataset": "OpenEvals/leaderboard-data",
          "source_config": "default",
          "source_split": "train",
          "source_row_count": 105,
          "captured_at": "2026-05-28T11:57:28.766Z",
          "evidence_note": "External model quality prior from OpenEvals; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "openevals",
          "metric": "openevals_aggregate",
          "value": 57.87,
          "normalized_0_100": 57.87,
          "scale": "0-100",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/OpenEvals/leaderboard-data",
          "source_dataset": "OpenEvals/leaderboard-data",
          "source_config": "default",
          "source_split": "train",
          "source_row_count": 105,
          "captured_at": "2026-05-28T11:57:28.766Z",
          "evidence_note": "External model quality prior from OpenEvals; not a RunLocalAI local execution measurement."
        }
      ]
    },
    {
      "model_key": "gemini-1-5-pro-exp-0801",
      "display_name": "gemini-1.5-pro-exp-0801",
      "provider": null,
      "aliases": [
        "gemini-1.5-pro-exp-0801"
      ],
      "openness": null,
      "license": null,
      "parameter_billions": null,
      "context_window": null,
      "modalities": [
        "text"
      ],
      "source_ids": [
        "livebench"
      ],
      "composite": {
        "score_0_100": 57.84,
        "confidence": "low",
        "components": [
          {
            "source_id": "livebench",
            "metric": "livebench_overall_mean_judgment",
            "normalized_0_100": 57.84,
            "weight": 0.2
          }
        ],
        "derived_from": [
          "livebench:livebench_overall_mean_judgment"
        ],
        "limitations": [
          "This is an external model-intelligence prior, not a RunLocalAI local speed or fit measurement.",
          "Provider papers, human-preference ratings, and LiveBench judgments are not interchangeable with quantized local execution.",
          "Use this score only beside Will-It-Run fit, quantization, context, runtime, and measured tok/s evidence."
        ]
      },
      "scores": [
        {
          "source_id": "livebench",
          "metric": "livebench_category_coding_mean_judgment",
          "value": 0,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "coding",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench category aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_category_instruction_following_mean_judgment",
          "value": 1,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "instruction_following",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench category aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_category_language_mean_judgment",
          "value": 0.823529,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "language",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench category aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_overall_mean_judgment",
          "value": 0.578431,
          "normalized_0_100": 57.84,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench judgment aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_task_connections_mean_judgment",
          "value": 1,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "language",
          "task": "connections",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench task aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_task_lcb_generation_mean_judgment",
          "value": 0,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "coding",
          "task": "LCB_generation",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench task aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_task_plot_unscrambling_mean_judgment",
          "value": 0.470588,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "language",
          "task": "plot_unscrambling",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench task aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_task_story_generation_mean_judgment",
          "value": 1,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "instruction_following",
          "task": "story_generation",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench task aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_task_typos_mean_judgment",
          "value": 1,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "language",
          "task": "typos",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench task aggregate; not a RunLocalAI local execution measurement."
        }
      ]
    },
    {
      "model_key": "command-r-plus",
      "display_name": "command-r-plus",
      "provider": "cohere",
      "aliases": [
        "command-r-plus"
      ],
      "openness": null,
      "license": "CC-BY-NC-4.0",
      "parameter_billions": null,
      "context_window": null,
      "modalities": [
        "text"
      ],
      "source_ids": [
        "lmarena"
      ],
      "composite": {
        "score_0_100": 57.79,
        "confidence": "low",
        "components": [
          {
            "source_id": "lmarena",
            "metric": "lmarena_text_overall_rating",
            "normalized_0_100": 57.79,
            "weight": 0.35
          }
        ],
        "derived_from": [
          "lmarena:lmarena_text_overall_rating"
        ],
        "limitations": [
          "This is an external model-intelligence prior, not a RunLocalAI local speed or fit measurement.",
          "Provider papers, human-preference ratings, and LiveBench judgments are not interchangeable with quantized local execution.",
          "Use this score only beside Will-It-Run fit, quantization, context, runtime, and measured tok/s evidence."
        ]
      },
      "scores": [
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_chinese_rating",
          "value": 1190.0208810037511,
          "normalized_0_100": 56.14,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 251,
          "lower": 1180.4109989130613,
          "upper": 1199.6307630944407,
          "category": "chinese",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_coding_rating",
          "value": 1172.3251845436457,
          "normalized_0_100": 54.03,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 273,
          "lower": 1164.6485128889703,
          "upper": 1180.001856198321,
          "category": "coding",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_overall_rating",
          "value": 1203.8753497146824,
          "normalized_0_100": 57.79,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 262,
          "lower": 1199.5881004073476,
          "upper": 1208.1625990220173,
          "category": "overall",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        }
      ]
    },
    {
      "model_key": "hunyuan-standard-256k",
      "display_name": "hunyuan-standard-256k",
      "provider": "tencent",
      "aliases": [
        "hunyuan-standard-256k"
      ],
      "openness": null,
      "license": "Proprietary",
      "parameter_billions": null,
      "context_window": null,
      "modalities": [
        "text"
      ],
      "source_ids": [
        "lmarena"
      ],
      "composite": {
        "score_0_100": 57.56,
        "confidence": "low",
        "components": [
          {
            "source_id": "lmarena",
            "metric": "lmarena_text_overall_rating",
            "normalized_0_100": 57.56,
            "weight": 0.35
          }
        ],
        "derived_from": [
          "lmarena:lmarena_text_overall_rating"
        ],
        "limitations": [
          "This is an external model-intelligence prior, not a RunLocalAI local speed or fit measurement.",
          "Provider papers, human-preference ratings, and LiveBench judgments are not interchangeable with quantized local execution.",
          "Use this score only beside Will-It-Run fit, quantization, context, runtime, and measured tok/s evidence."
        ]
      },
      "scores": [
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_chinese_rating",
          "value": 1240.5769469675956,
          "normalized_0_100": 62.15,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 217,
          "lower": 1206.5042023992983,
          "upper": 1274.649691535893,
          "category": "chinese",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_coding_rating",
          "value": 1231.2305068362075,
          "normalized_0_100": 61.04,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 244,
          "lower": 1206.8261669666133,
          "upper": 1255.6348467058017,
          "category": "coding",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_overall_rating",
          "value": 1201.9633390262707,
          "normalized_0_100": 57.56,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 264,
          "lower": 1190.3697554456428,
          "upper": 1213.5569226068988,
          "category": "overall",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        }
      ]
    },
    {
      "model_key": "chatgpt-4o-latest-2025-01-30",
      "display_name": "chatgpt-4o-latest-2025-01-30",
      "provider": null,
      "aliases": [
        "chatgpt-4o-latest-2025-01-30"
      ],
      "openness": null,
      "license": null,
      "parameter_billions": null,
      "context_window": null,
      "modalities": [
        "text"
      ],
      "source_ids": [
        "livebench"
      ],
      "composite": {
        "score_0_100": 57.5,
        "confidence": "low",
        "components": [
          {
            "source_id": "livebench",
            "metric": "livebench_overall_mean_judgment",
            "normalized_0_100": 57.5,
            "weight": 0.2
          }
        ],
        "derived_from": [
          "livebench:livebench_overall_mean_judgment"
        ],
        "limitations": [
          "This is an external model-intelligence prior, not a RunLocalAI local speed or fit measurement.",
          "Provider papers, human-preference ratings, and LiveBench judgments are not interchangeable with quantized local execution.",
          "Use this score only beside Will-It-Run fit, quantization, context, runtime, and measured tok/s evidence."
        ]
      },
      "scores": [
        {
          "source_id": "livebench",
          "metric": "livebench_category_coding_mean_judgment",
          "value": 1,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "coding",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench category aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_category_instruction_following_mean_judgment",
          "value": 0.3,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "instruction_following",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench category aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_category_language_mean_judgment",
          "value": 0,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "language",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench category aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_overall_mean_judgment",
          "value": 0.575,
          "normalized_0_100": 57.5,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench judgment aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_task_coding_completion_mean_judgment",
          "value": 1,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "coding",
          "task": "coding_completion",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench task aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_task_lcb_generation_mean_judgment",
          "value": 1,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "coding",
          "task": "LCB_generation",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench task aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_task_paraphrase_mean_judgment",
          "value": 0.3,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "instruction_following",
          "task": "paraphrase",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench task aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_task_typos_mean_judgment",
          "value": 0,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "language",
          "task": "typos",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench task aggregate; not a RunLocalAI local execution measurement."
        }
      ]
    },
    {
      "model_key": "llama-3-1-tulu-3-8b",
      "display_name": "llama-3.1-tulu-3-8b",
      "provider": "allenai",
      "aliases": [
        "llama-3.1-tulu-3-8b"
      ],
      "openness": null,
      "license": "Llama 3.1",
      "parameter_billions": null,
      "context_window": null,
      "modalities": [
        "text"
      ],
      "source_ids": [
        "lmarena"
      ],
      "composite": {
        "score_0_100": 56.54,
        "confidence": "low",
        "components": [
          {
            "source_id": "lmarena",
            "metric": "lmarena_text_overall_rating",
            "normalized_0_100": 56.54,
            "weight": 0.35
          }
        ],
        "derived_from": [
          "lmarena:lmarena_text_overall_rating"
        ],
        "limitations": [
          "This is an external model-intelligence prior, not a RunLocalAI local speed or fit measurement.",
          "Provider papers, human-preference ratings, and LiveBench judgments are not interchangeable with quantized local execution.",
          "Use this score only beside Will-It-Run fit, quantization, context, runtime, and measured tok/s evidence."
        ]
      },
      "scores": [
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_chinese_rating",
          "value": 1174.7155382896149,
          "normalized_0_100": 54.32,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 260,
          "lower": 1138.6282271989671,
          "upper": 1210.8028493802626,
          "category": "chinese",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_coding_rating",
          "value": 1183.9561308075456,
          "normalized_0_100": 55.42,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 269,
          "lower": 1159.5324584436999,
          "upper": 1208.3798031713914,
          "category": "coding",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_overall_rating",
          "value": 1193.3629290287465,
          "normalized_0_100": 56.54,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 266,
          "lower": 1182.8459780225662,
          "upper": 1203.8798800349268,
          "category": "overall",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        }
      ]
    },
    {
      "model_key": "deepseek-coder-v2",
      "display_name": "deepseek-coder-v2",
      "provider": "deepseek",
      "aliases": [
        "deepseek-coder-v2"
      ],
      "openness": null,
      "license": "DeepSeek License",
      "parameter_billions": null,
      "context_window": null,
      "modalities": [
        "text"
      ],
      "source_ids": [
        "lmarena"
      ],
      "composite": {
        "score_0_100": 56.28,
        "confidence": "low",
        "components": [
          {
            "source_id": "lmarena",
            "metric": "lmarena_text_overall_rating",
            "normalized_0_100": 56.28,
            "weight": 0.35
          }
        ],
        "derived_from": [
          "lmarena:lmarena_text_overall_rating"
        ],
        "limitations": [
          "This is an external model-intelligence prior, not a RunLocalAI local speed or fit measurement.",
          "Provider papers, human-preference ratings, and LiveBench judgments are not interchangeable with quantized local execution.",
          "Use this score only beside Will-It-Run fit, quantization, context, runtime, and measured tok/s evidence."
        ]
      },
      "scores": [
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_chinese_rating",
          "value": 1199.9009674392373,
          "normalized_0_100": 57.31,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 247,
          "lower": 1184.9321700415103,
          "upper": 1214.8697648369644,
          "category": "chinese",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_coding_rating",
          "value": 1251.4881056132867,
          "normalized_0_100": 63.45,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 236,
          "lower": 1239.5860898381197,
          "upper": 1263.390121388454,
          "category": "coding",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_overall_rating",
          "value": 1191.2354997060995,
          "normalized_0_100": 56.28,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 267,
          "lower": 1184.8894605849528,
          "upper": 1197.5815388272463,
          "category": "overall",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        }
      ]
    },
    {
      "model_key": "ministral-8b-2410",
      "display_name": "ministral-8b-2410",
      "provider": "mistral",
      "aliases": [
        "ministral-8b-2410"
      ],
      "openness": null,
      "license": "MRL",
      "parameter_billions": null,
      "context_window": null,
      "modalities": [
        "text"
      ],
      "source_ids": [
        "lmarena"
      ],
      "composite": {
        "score_0_100": 56.25,
        "confidence": "low",
        "components": [
          {
            "source_id": "lmarena",
            "metric": "lmarena_text_overall_rating",
            "normalized_0_100": 56.25,
            "weight": 0.35
          }
        ],
        "derived_from": [
          "lmarena:lmarena_text_overall_rating"
        ],
        "limitations": [
          "This is an external model-intelligence prior, not a RunLocalAI local speed or fit measurement.",
          "Provider papers, human-preference ratings, and LiveBench judgments are not interchangeable with quantized local execution.",
          "Use this score only beside Will-It-Run fit, quantization, context, runtime, and measured tok/s evidence."
        ]
      },
      "scores": [
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_chinese_rating",
          "value": 1192.4159509487672,
          "normalized_0_100": 56.42,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 249,
          "lower": 1167.257397362873,
          "upper": 1217.5745045346612,
          "category": "chinese",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_coding_rating",
          "value": 1202.4060203323352,
          "normalized_0_100": 57.61,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 258,
          "lower": 1183.8050974262528,
          "upper": 1221.0069432384175,
          "category": "coding",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_overall_rating",
          "value": 1190.9543540929026,
          "normalized_0_100": 56.25,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 268,
          "lower": 1181.9664520770216,
          "upper": 1199.9422561087836,
          "category": "overall",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        }
      ]
    },
    {
      "model_key": "grok-3-mini-reasoning-beta",
      "display_name": "grok-3-mini-reasoning-beta",
      "provider": null,
      "aliases": [
        "grok-3-mini-reasoning-beta"
      ],
      "openness": null,
      "license": null,
      "parameter_billions": null,
      "context_window": null,
      "modalities": [
        "text"
      ],
      "source_ids": [
        "livebench"
      ],
      "composite": {
        "score_0_100": 56.08,
        "confidence": "low",
        "components": [
          {
            "source_id": "livebench",
            "metric": "livebench_overall_mean_judgment",
            "normalized_0_100": 56.08,
            "weight": 0.2
          }
        ],
        "derived_from": [
          "livebench:livebench_overall_mean_judgment"
        ],
        "limitations": [
          "This is an external model-intelligence prior, not a RunLocalAI local speed or fit measurement.",
          "Provider papers, human-preference ratings, and LiveBench judgments are not interchangeable with quantized local execution.",
          "Use this score only beside Will-It-Run fit, quantization, context, runtime, and measured tok/s evidence."
        ]
      },
      "scores": [
        {
          "source_id": "livebench",
          "metric": "livebench_category_coding_mean_judgment",
          "value": 1,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "coding",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench category aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_category_instruction_following_mean_judgment",
          "value": 0.333333,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "instruction_following",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench category aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_category_language_mean_judgment",
          "value": 0.235294,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "language",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench category aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_overall_mean_judgment",
          "value": 0.560784,
          "normalized_0_100": 56.08,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench judgment aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_task_coding_completion_mean_judgment",
          "value": 1,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "coding",
          "task": "coding_completion",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench task aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_task_lcb_generation_mean_judgment",
          "value": 1,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "coding",
          "task": "LCB_generation",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench task aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_task_paraphrase_mean_judgment",
          "value": 0.333333,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "instruction_following",
          "task": "paraphrase",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench task aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_task_plot_unscrambling_mean_judgment",
          "value": 0.470588,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "language",
          "task": "plot_unscrambling",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench task aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_task_typos_mean_judgment",
          "value": 0,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "language",
          "task": "typos",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench task aggregate; not a RunLocalAI local execution measurement."
        }
      ]
    },
    {
      "model_key": "deepseek-r1-distill-qwen-32b",
      "display_name": "deepseek-r1-distill-qwen-32b",
      "provider": null,
      "aliases": [
        "DeepSeek-R1-Distill-Qwen-32B",
        "deepseek-r1-distill-qwen-32b"
      ],
      "openness": null,
      "license": null,
      "parameter_billions": null,
      "context_window": null,
      "modalities": [
        "text"
      ],
      "source_ids": [
        "livebench"
      ],
      "composite": {
        "score_0_100": 55.91,
        "confidence": "low",
        "components": [
          {
            "source_id": "livebench",
            "metric": "livebench_overall_mean_judgment",
            "normalized_0_100": 55.91,
            "weight": 0.2
          }
        ],
        "derived_from": [
          "livebench:livebench_overall_mean_judgment"
        ],
        "limitations": [
          "This is an external model-intelligence prior, not a RunLocalAI local speed or fit measurement.",
          "Provider papers, human-preference ratings, and LiveBench judgments are not interchangeable with quantized local execution.",
          "Use this score only beside Will-It-Run fit, quantization, context, runtime, and measured tok/s evidence."
        ]
      },
      "scores": [
        {
          "source_id": "livebench",
          "metric": "livebench_category_coding_mean_judgment",
          "value": 1,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "coding",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench category aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_category_coding_mean_judgment",
          "value": 0,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "coding",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench category aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_category_instruction_following_mean_judgment",
          "value": 0.3,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "instruction_following",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench category aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_category_instruction_following_mean_judgment",
          "value": 0.2,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "instruction_following",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench category aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_category_language_mean_judgment",
          "value": 0.513636,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "language",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench category aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_category_language_mean_judgment",
          "value": 0.095238,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "language",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench category aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_overall_mean_judgment",
          "value": 0.559091,
          "normalized_0_100": 55.91,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench judgment aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_overall_mean_judgment",
          "value": 0.098413,
          "normalized_0_100": 9.84,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench judgment aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_task_coding_completion_mean_judgment",
          "value": 0,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "coding",
          "task": "coding_completion",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench task aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_task_lcb_generation_mean_judgment",
          "value": 1,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "coding",
          "task": "LCB_generation",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench task aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_task_paraphrase_mean_judgment",
          "value": 0.3,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "instruction_following",
          "task": "paraphrase",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench task aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_task_paraphrase_mean_judgment",
          "value": 0.2,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "instruction_following",
          "task": "paraphrase",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench task aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_task_plot_unscrambling_mean_judgment",
          "value": 0.054545,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "language",
          "task": "plot_unscrambling",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench task aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_task_plot_unscrambling_mean_judgment",
          "value": 0.095238,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "language",
          "task": "plot_unscrambling",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench task aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_task_typos_mean_judgment",
          "value": 0.666667,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "language",
          "task": "typos",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench task aggregate; not a RunLocalAI local execution measurement."
        }
      ]
    },
    {
      "model_key": "jamba-1-5-mini",
      "display_name": "jamba-1.5-mini",
      "provider": null,
      "aliases": [
        "jamba-1.5-mini"
      ],
      "openness": null,
      "license": "Jamba Open",
      "parameter_billions": null,
      "context_window": null,
      "modalities": [
        "text"
      ],
      "source_ids": [
        "lmarena"
      ],
      "composite": {
        "score_0_100": 55.75,
        "confidence": "low",
        "components": [
          {
            "source_id": "lmarena",
            "metric": "lmarena_text_overall_rating",
            "normalized_0_100": 55.75,
            "weight": 0.35
          }
        ],
        "derived_from": [
          "lmarena:lmarena_text_overall_rating"
        ],
        "limitations": [
          "This is an external model-intelligence prior, not a RunLocalAI local speed or fit measurement.",
          "Provider papers, human-preference ratings, and LiveBench judgments are not interchangeable with quantized local execution.",
          "Use this score only beside Will-It-Run fit, quantization, context, runtime, and measured tok/s evidence."
        ]
      },
      "scores": [
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_chinese_rating",
          "value": 1134.6458863040466,
          "normalized_0_100": 49.55,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 271,
          "lower": 1113.2691353808975,
          "upper": 1156.0226372271957,
          "category": "chinese",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_coding_rating",
          "value": 1178.8050708142023,
          "normalized_0_100": 54.81,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 271,
          "lower": 1163.6160842160975,
          "upper": 1193.9940574123066,
          "category": "coding",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_overall_rating",
          "value": 1186.7208845848145,
          "normalized_0_100": 55.75,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 270,
          "lower": 1179.5543773281406,
          "upper": 1193.8873918414886,
          "category": "overall",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        }
      ]
    },
    {
      "model_key": "llama-3-1-8b-instruct",
      "display_name": "llama-3.1-8b-instruct",
      "provider": "meta",
      "aliases": [
        "llama-3.1-8b-instruct"
      ],
      "openness": null,
      "license": "Llama 3.1 Community",
      "parameter_billions": null,
      "context_window": null,
      "modalities": [
        "text"
      ],
      "source_ids": [
        "lmarena"
      ],
      "composite": {
        "score_0_100": 55.74,
        "confidence": "low",
        "components": [
          {
            "source_id": "lmarena",
            "metric": "lmarena_text_overall_rating",
            "normalized_0_100": 55.74,
            "weight": 0.35
          }
        ],
        "derived_from": [
          "lmarena:lmarena_text_overall_rating"
        ],
        "limitations": [
          "This is an external model-intelligence prior, not a RunLocalAI local speed or fit measurement.",
          "Provider papers, human-preference ratings, and LiveBench judgments are not interchangeable with quantized local execution.",
          "Use this score only beside Will-It-Run fit, quantization, context, runtime, and measured tok/s evidence."
        ]
      },
      "scores": [
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_chinese_rating",
          "value": 1149.8255484954814,
          "normalized_0_100": 51.36,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 263,
          "lower": 1139.531270599231,
          "upper": 1160.1198263917317,
          "category": "chinese",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_coding_rating",
          "value": 1195.3186785313185,
          "normalized_0_100": 56.77,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 262,
          "lower": 1187.9215463989722,
          "upper": 1202.7158106636646,
          "category": "coding",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_overall_rating",
          "value": 1186.6246654723261,
          "normalized_0_100": 55.74,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 271,
          "lower": 1182.5454989596265,
          "upper": 1190.7038319850258,
          "category": "overall",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        }
      ]
    },
    {
      "model_key": "c4ai-aya-expanse-8b",
      "display_name": "c4ai-aya-expanse-8b",
      "provider": "cohere",
      "aliases": [
        "c4ai-aya-expanse-8b"
      ],
      "openness": null,
      "license": "CC-BY-NC-4.0",
      "parameter_billions": null,
      "context_window": null,
      "modalities": [
        "text"
      ],
      "source_ids": [
        "lmarena"
      ],
      "composite": {
        "score_0_100": 55.56,
        "confidence": "low",
        "components": [
          {
            "source_id": "lmarena",
            "metric": "lmarena_text_overall_rating",
            "normalized_0_100": 55.56,
            "weight": 0.35
          }
        ],
        "derived_from": [
          "lmarena:lmarena_text_overall_rating"
        ],
        "limitations": [
          "This is an external model-intelligence prior, not a RunLocalAI local speed or fit measurement.",
          "Provider papers, human-preference ratings, and LiveBench judgments are not interchangeable with quantized local execution.",
          "Use this score only beside Will-It-Run fit, quantization, context, runtime, and measured tok/s evidence."
        ]
      },
      "scores": [
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_chinese_rating",
          "value": 1180.0487965553607,
          "normalized_0_100": 54.95,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 257,
          "lower": 1156.7777499428196,
          "upper": 1203.319843167902,
          "category": "chinese",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_coding_rating",
          "value": 1161.0378461769828,
          "normalized_0_100": 52.69,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 280,
          "lower": 1146.5916964625105,
          "upper": 1175.483995891455,
          "category": "coding",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_overall_rating",
          "value": 1185.1635844748625,
          "normalized_0_100": 55.56,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 273,
          "lower": 1178.2549880240088,
          "upper": 1192.0721809257166,
          "category": "overall",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        }
      ]
    },
    {
      "model_key": "gemini-exp-1114",
      "display_name": "gemini-exp-1114",
      "provider": null,
      "aliases": [
        "gemini-exp-1114"
      ],
      "openness": null,
      "license": null,
      "parameter_billions": null,
      "context_window": null,
      "modalities": [
        "text"
      ],
      "source_ids": [
        "livebench"
      ],
      "composite": {
        "score_0_100": 55.55,
        "confidence": "low",
        "components": [
          {
            "source_id": "livebench",
            "metric": "livebench_overall_mean_judgment",
            "normalized_0_100": 55.55,
            "weight": 0.2
          }
        ],
        "derived_from": [
          "livebench:livebench_overall_mean_judgment"
        ],
        "limitations": [
          "This is an external model-intelligence prior, not a RunLocalAI local speed or fit measurement.",
          "Provider papers, human-preference ratings, and LiveBench judgments are not interchangeable with quantized local execution.",
          "Use this score only beside Will-It-Run fit, quantization, context, runtime, and measured tok/s evidence."
        ]
      },
      "scores": [
        {
          "source_id": "livebench",
          "metric": "livebench_category_coding_mean_judgment",
          "value": 0.5,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "coding",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench category aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_category_instruction_following_mean_judgment",
          "value": 0.65,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "instruction_following",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench category aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_category_language_mean_judgment",
          "value": 0.529412,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "language",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench category aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_overall_mean_judgment",
          "value": 0.555462,
          "normalized_0_100": 55.55,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench judgment aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_task_connections_mean_judgment",
          "value": 1,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "language",
          "task": "connections",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench task aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_task_lcb_generation_mean_judgment",
          "value": 0.5,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "coding",
          "task": "LCB_generation",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench task aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_task_paraphrase_mean_judgment",
          "value": 0.3,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "instruction_following",
          "task": "paraphrase",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench task aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_task_plot_unscrambling_mean_judgment",
          "value": 0.588235,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "language",
          "task": "plot_unscrambling",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench task aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_task_story_generation_mean_judgment",
          "value": 1,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "instruction_following",
          "task": "story_generation",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench task aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_task_typos_mean_judgment",
          "value": 0,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "language",
          "task": "typos",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench task aggregate; not a RunLocalAI local execution measurement."
        }
      ]
    },
    {
      "model_key": "qwen-qwen3-5-2b",
      "display_name": "Qwen/Qwen3.5-2B",
      "provider": "Qwen",
      "aliases": [
        "Qwen/Qwen3.5-2B"
      ],
      "openness": "open",
      "license": "Unknown",
      "parameter_billions": 2.3,
      "context_window": null,
      "modalities": [
        "text"
      ],
      "source_ids": [
        "openevals"
      ],
      "composite": {
        "score_0_100": 55.3,
        "confidence": "low",
        "components": [
          {
            "source_id": "openevals",
            "metric": "openevals_aggregate",
            "normalized_0_100": 55.3,
            "weight": 0.45
          }
        ],
        "derived_from": [
          "openevals:openevals_aggregate"
        ],
        "limitations": [
          "This is an external model-intelligence prior, not a RunLocalAI local speed or fit measurement.",
          "Provider papers, human-preference ratings, and LiveBench judgments are not interchangeable with quantized local execution.",
          "Use this score only beside Will-It-Run fit, quantization, context, runtime, and measured tok/s evidence."
        ]
      },
      "scores": [
        {
          "source_id": "openevals",
          "metric": "mmlu_pro",
          "value": 55.3,
          "normalized_0_100": 55.3,
          "scale": "0-100",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/OpenEvals/leaderboard-data",
          "source_dataset": "OpenEvals/leaderboard-data",
          "source_config": "default",
          "source_split": "train",
          "source_row_count": 105,
          "captured_at": "2026-05-28T11:57:28.766Z",
          "evidence_note": "External model quality prior from OpenEvals; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "openevals",
          "metric": "openevals_aggregate",
          "value": 55.3,
          "normalized_0_100": 55.3,
          "scale": "0-100",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/OpenEvals/leaderboard-data",
          "source_dataset": "OpenEvals/leaderboard-data",
          "source_config": "default",
          "source_split": "train",
          "source_row_count": 105,
          "captured_at": "2026-05-28T11:57:28.766Z",
          "evidence_note": "External model quality prior from OpenEvals; not a RunLocalAI local execution measurement."
        }
      ]
    },
    {
      "model_key": "gemini-1-5-pro-exp-0827",
      "display_name": "gemini-1.5-pro-exp-0827",
      "provider": null,
      "aliases": [
        "gemini-1.5-pro-exp-0827"
      ],
      "openness": null,
      "license": null,
      "parameter_billions": null,
      "context_window": null,
      "modalities": [
        "text"
      ],
      "source_ids": [
        "livebench"
      ],
      "composite": {
        "score_0_100": 54.45,
        "confidence": "low",
        "components": [
          {
            "source_id": "livebench",
            "metric": "livebench_overall_mean_judgment",
            "normalized_0_100": 54.45,
            "weight": 0.2
          }
        ],
        "derived_from": [
          "livebench:livebench_overall_mean_judgment"
        ],
        "limitations": [
          "This is an external model-intelligence prior, not a RunLocalAI local speed or fit measurement.",
          "Provider papers, human-preference ratings, and LiveBench judgments are not interchangeable with quantized local execution.",
          "Use this score only beside Will-It-Run fit, quantization, context, runtime, and measured tok/s evidence."
        ]
      },
      "scores": [
        {
          "source_id": "livebench",
          "metric": "livebench_category_coding_mean_judgment",
          "value": 0,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "coding",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench category aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_category_instruction_following_mean_judgment",
          "value": 0.7,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "instruction_following",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench category aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_category_language_mean_judgment",
          "value": 0.803922,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "language",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench category aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_overall_mean_judgment",
          "value": 0.544538,
          "normalized_0_100": 54.45,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench judgment aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_task_connections_mean_judgment",
          "value": 1,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "language",
          "task": "connections",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench task aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_task_lcb_generation_mean_judgment",
          "value": 0,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "coding",
          "task": "LCB_generation",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench task aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_task_paraphrase_mean_judgment",
          "value": 0.4,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "instruction_following",
          "task": "paraphrase",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench task aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_task_plot_unscrambling_mean_judgment",
          "value": 0.411765,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "language",
          "task": "plot_unscrambling",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench task aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_task_story_generation_mean_judgment",
          "value": 1,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "instruction_following",
          "task": "story_generation",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench task aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_task_typos_mean_judgment",
          "value": 1,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "language",
          "task": "typos",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench task aggregate; not a RunLocalAI local execution measurement."
        }
      ]
    },
    {
      "model_key": "meta-llama-llama-3-1-8b-instruct",
      "display_name": "meta-llama/Llama-3.1-8B-Instruct",
      "provider": "meta-llama",
      "aliases": [
        "meta-llama/Llama-3.1-8B-Instruct"
      ],
      "openness": "open",
      "license": "Unknown",
      "parameter_billions": 8,
      "context_window": null,
      "modalities": [
        "text"
      ],
      "source_ids": [
        "openevals"
      ],
      "composite": {
        "score_0_100": 54.4,
        "confidence": "low",
        "components": [
          {
            "source_id": "openevals",
            "metric": "openevals_aggregate",
            "normalized_0_100": 54.4,
            "weight": 0.45
          }
        ],
        "derived_from": [
          "openevals:openevals_aggregate"
        ],
        "limitations": [
          "This is an external model-intelligence prior, not a RunLocalAI local speed or fit measurement.",
          "Provider papers, human-preference ratings, and LiveBench judgments are not interchangeable with quantized local execution.",
          "Use this score only beside Will-It-Run fit, quantization, context, runtime, and measured tok/s evidence."
        ]
      },
      "scores": [
        {
          "source_id": "openevals",
          "metric": "gpqa",
          "value": 30.4,
          "normalized_0_100": 30.4,
          "scale": "0-100",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/OpenEvals/leaderboard-data",
          "source_dataset": "OpenEvals/leaderboard-data",
          "source_config": "default",
          "source_split": "train",
          "source_row_count": 105,
          "captured_at": "2026-05-28T11:57:28.766Z",
          "evidence_note": "External model quality prior from OpenEvals; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "openevals",
          "metric": "gsm8k",
          "value": 84.5,
          "normalized_0_100": 84.5,
          "scale": "0-100",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/OpenEvals/leaderboard-data",
          "source_dataset": "OpenEvals/leaderboard-data",
          "source_config": "default",
          "source_split": "train",
          "source_row_count": 105,
          "captured_at": "2026-05-28T11:57:28.766Z",
          "evidence_note": "External model quality prior from OpenEvals; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "openevals",
          "metric": "mmlu_pro",
          "value": 48.3,
          "normalized_0_100": 48.3,
          "scale": "0-100",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/OpenEvals/leaderboard-data",
          "source_dataset": "OpenEvals/leaderboard-data",
          "source_config": "default",
          "source_split": "train",
          "source_row_count": 105,
          "captured_at": "2026-05-28T11:57:28.766Z",
          "evidence_note": "External model quality prior from OpenEvals; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "openevals",
          "metric": "openevals_aggregate",
          "value": 54.4,
          "normalized_0_100": 54.4,
          "scale": "0-100",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/OpenEvals/leaderboard-data",
          "source_dataset": "OpenEvals/leaderboard-data",
          "source_config": "default",
          "source_split": "train",
          "source_row_count": 105,
          "captured_at": "2026-05-28T11:57:28.766Z",
          "evidence_note": "External model quality prior from OpenEvals; not a RunLocalAI local execution measurement."
        }
      ]
    },
    {
      "model_key": "yi-1-5-34b-chat",
      "display_name": "yi-1.5-34b-chat",
      "provider": null,
      "aliases": [
        "yi-1.5-34b-chat"
      ],
      "openness": null,
      "license": "Apache-2.0",
      "parameter_billions": null,
      "context_window": null,
      "modalities": [
        "text"
      ],
      "source_ids": [
        "lmarena"
      ],
      "composite": {
        "score_0_100": 54.12,
        "confidence": "low",
        "components": [
          {
            "source_id": "lmarena",
            "metric": "lmarena_text_overall_rating",
            "normalized_0_100": 54.12,
            "weight": 0.35
          }
        ],
        "derived_from": [
          "lmarena:lmarena_text_overall_rating"
        ],
        "limitations": [
          "This is an external model-intelligence prior, not a RunLocalAI local speed or fit measurement.",
          "Provider papers, human-preference ratings, and LiveBench judgments are not interchangeable with quantized local execution.",
          "Use this score only beside Will-It-Run fit, quantization, context, runtime, and measured tok/s evidence."
        ]
      },
      "scores": [
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_chinese_rating",
          "value": 1212.0644210068256,
          "normalized_0_100": 58.76,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 240,
          "lower": 1199.371444827383,
          "upper": 1224.7573971862682,
          "category": "chinese",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_coding_rating",
          "value": 1169.7955534319517,
          "normalized_0_100": 53.73,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 275,
          "lower": 1159.3710432897242,
          "upper": 1180.2200635741792,
          "category": "coding",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_overall_rating",
          "value": 1173.0021691707389,
          "normalized_0_100": 54.12,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 276,
          "lower": 1167.9968727057935,
          "upper": 1178.0074656356842,
          "category": "overall",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        }
      ]
    },
    {
      "model_key": "facebook-cwm",
      "display_name": "facebook/cwm",
      "provider": "facebook",
      "aliases": [
        "facebook/cwm"
      ],
      "openness": "open",
      "license": "Unknown",
      "parameter_billions": 32.6,
      "context_window": null,
      "modalities": [
        "text"
      ],
      "source_ids": [
        "openevals"
      ],
      "composite": {
        "score_0_100": 53.9,
        "confidence": "low",
        "components": [
          {
            "source_id": "openevals",
            "metric": "openevals_aggregate",
            "normalized_0_100": 53.9,
            "weight": 0.45
          }
        ],
        "derived_from": [
          "openevals:openevals_aggregate"
        ],
        "limitations": [
          "This is an external model-intelligence prior, not a RunLocalAI local speed or fit measurement.",
          "Provider papers, human-preference ratings, and LiveBench judgments are not interchangeable with quantized local execution.",
          "Use this score only beside Will-It-Run fit, quantization, context, runtime, and measured tok/s evidence."
        ]
      },
      "scores": [
        {
          "source_id": "openevals",
          "metric": "openevals_aggregate",
          "value": 53.9,
          "normalized_0_100": 53.9,
          "scale": "0-100",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/OpenEvals/leaderboard-data",
          "source_dataset": "OpenEvals/leaderboard-data",
          "source_config": "default",
          "source_split": "train",
          "source_row_count": 105,
          "captured_at": "2026-05-28T11:57:28.766Z",
          "evidence_note": "External model quality prior from OpenEvals; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "openevals",
          "metric": "swe_verified",
          "value": 53.9,
          "normalized_0_100": 53.9,
          "scale": "0-100",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/OpenEvals/leaderboard-data",
          "source_dataset": "OpenEvals/leaderboard-data",
          "source_config": "default",
          "source_split": "train",
          "source_row_count": 105,
          "captured_at": "2026-05-28T11:57:28.766Z",
          "evidence_note": "External model quality prior from OpenEvals; not a RunLocalAI local execution measurement."
        }
      ]
    },
    {
      "model_key": "reka-flash-21b-20240226-online",
      "display_name": "reka-flash-21b-20240226-online",
      "provider": null,
      "aliases": [
        "reka-flash-21b-20240226-online"
      ],
      "openness": null,
      "license": "Proprietary",
      "parameter_billions": null,
      "context_window": null,
      "modalities": [
        "text"
      ],
      "source_ids": [
        "lmarena"
      ],
      "composite": {
        "score_0_100": 53.81,
        "confidence": "low",
        "components": [
          {
            "source_id": "lmarena",
            "metric": "lmarena_text_overall_rating",
            "normalized_0_100": 53.81,
            "weight": 0.35
          }
        ],
        "derived_from": [
          "lmarena:lmarena_text_overall_rating"
        ],
        "limitations": [
          "This is an external model-intelligence prior, not a RunLocalAI local speed or fit measurement.",
          "Provider papers, human-preference ratings, and LiveBench judgments are not interchangeable with quantized local execution.",
          "Use this score only beside Will-It-Run fit, quantization, context, runtime, and measured tok/s evidence."
        ]
      },
      "scores": [
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_chinese_rating",
          "value": 1142.2527486102695,
          "normalized_0_100": 50.46,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 267,
          "lower": 1127.1471399678458,
          "upper": 1157.3583572526932,
          "category": "chinese",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_coding_rating",
          "value": 1165.0171708110852,
          "normalized_0_100": 53.17,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 278,
          "lower": 1152.2718681651506,
          "upper": 1177.76247345702,
          "category": "coding",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_overall_rating",
          "value": 1170.4764694956098,
          "normalized_0_100": 53.81,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 277,
          "lower": 1163.154723526283,
          "upper": 1177.7982154649364,
          "category": "overall",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        }
      ]
    },
    {
      "model_key": "minimaxai-minimax-m2-1",
      "display_name": "MiniMaxAI/MiniMax-M2.1",
      "provider": "MiniMaxAI",
      "aliases": [
        "MiniMaxAI/MiniMax-M2.1"
      ],
      "openness": "open",
      "license": "Unknown",
      "parameter_billions": 228.7,
      "context_window": null,
      "modalities": [
        "text"
      ],
      "source_ids": [
        "openevals"
      ],
      "composite": {
        "score_0_100": 53.59,
        "confidence": "low",
        "components": [
          {
            "source_id": "openevals",
            "metric": "openevals_aggregate",
            "normalized_0_100": 53.59,
            "weight": 0.45
          }
        ],
        "derived_from": [
          "openevals:openevals_aggregate"
        ],
        "limitations": [
          "This is an external model-intelligence prior, not a RunLocalAI local speed or fit measurement.",
          "Provider papers, human-preference ratings, and LiveBench judgments are not interchangeable with quantized local execution.",
          "Use this score only beside Will-It-Run fit, quantization, context, runtime, and measured tok/s evidence."
        ]
      },
      "scores": [
        {
          "source_id": "openevals",
          "metric": "evasion_bench",
          "value": 71.31,
          "normalized_0_100": 71.31,
          "scale": "0-100",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/OpenEvals/leaderboard-data",
          "source_dataset": "OpenEvals/leaderboard-data",
          "source_config": "default",
          "source_split": "train",
          "source_row_count": 105,
          "captured_at": "2026-05-28T11:57:28.766Z",
          "evidence_note": "External model quality prior from OpenEvals; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "openevals",
          "metric": "hle",
          "value": 22.2,
          "normalized_0_100": 22.2,
          "scale": "0-100",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/OpenEvals/leaderboard-data",
          "source_dataset": "OpenEvals/leaderboard-data",
          "source_config": "default",
          "source_split": "train",
          "source_row_count": 105,
          "captured_at": "2026-05-28T11:57:28.766Z",
          "evidence_note": "External model quality prior from OpenEvals; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "openevals",
          "metric": "mmlu_pro",
          "value": 88,
          "normalized_0_100": 88,
          "scale": "0-100",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/OpenEvals/leaderboard-data",
          "source_dataset": "OpenEvals/leaderboard-data",
          "source_config": "default",
          "source_split": "train",
          "source_row_count": 105,
          "captured_at": "2026-05-28T11:57:28.766Z",
          "evidence_note": "External model quality prior from OpenEvals; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "openevals",
          "metric": "openevals_aggregate",
          "value": 53.59,
          "normalized_0_100": 53.59,
          "scale": "0-100",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/OpenEvals/leaderboard-data",
          "source_dataset": "OpenEvals/leaderboard-data",
          "source_config": "default",
          "source_split": "train",
          "source_row_count": 105,
          "captured_at": "2026-05-28T11:57:28.766Z",
          "evidence_note": "External model quality prior from OpenEvals; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "openevals",
          "metric": "swe_pro",
          "value": 36.81,
          "normalized_0_100": 36.81,
          "scale": "0-100",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/OpenEvals/leaderboard-data",
          "source_dataset": "OpenEvals/leaderboard-data",
          "source_config": "default",
          "source_split": "train",
          "source_row_count": 105,
          "captured_at": "2026-05-28T11:57:28.766Z",
          "evidence_note": "External model quality prior from OpenEvals; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "openevals",
          "metric": "swe_verified",
          "value": 74,
          "normalized_0_100": 74,
          "scale": "0-100",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/OpenEvals/leaderboard-data",
          "source_dataset": "OpenEvals/leaderboard-data",
          "source_config": "default",
          "source_split": "train",
          "source_row_count": 105,
          "captured_at": "2026-05-28T11:57:28.766Z",
          "evidence_note": "External model quality prior from OpenEvals; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "openevals",
          "metric": "terminal_bench",
          "value": 29.2,
          "normalized_0_100": 29.2,
          "scale": "0-100",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/OpenEvals/leaderboard-data",
          "source_dataset": "OpenEvals/leaderboard-data",
          "source_config": "default",
          "source_split": "train",
          "source_row_count": 105,
          "captured_at": "2026-05-28T11:57:28.766Z",
          "evidence_note": "External model quality prior from OpenEvals; not a RunLocalAI local execution measurement."
        }
      ]
    },
    {
      "model_key": "llama-3-8b-instruct",
      "display_name": "llama-3-8b-instruct",
      "provider": "meta",
      "aliases": [
        "llama-3-8b-instruct"
      ],
      "openness": null,
      "license": "Llama 3 Community",
      "parameter_billions": null,
      "context_window": null,
      "modalities": [
        "text"
      ],
      "source_ids": [
        "lmarena"
      ],
      "composite": {
        "score_0_100": 53.24,
        "confidence": "low",
        "components": [
          {
            "source_id": "lmarena",
            "metric": "lmarena_text_overall_rating",
            "normalized_0_100": 53.24,
            "weight": 0.35
          }
        ],
        "derived_from": [
          "lmarena:lmarena_text_overall_rating"
        ],
        "limitations": [
          "This is an external model-intelligence prior, not a RunLocalAI local speed or fit measurement.",
          "Provider papers, human-preference ratings, and LiveBench judgments are not interchangeable with quantized local execution.",
          "Use this score only beside Will-It-Run fit, quantization, context, runtime, and measured tok/s evidence."
        ]
      },
      "scores": [
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_chinese_rating",
          "value": 1075.1007441063794,
          "normalized_0_100": 42.47,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 287,
          "lower": 1065.5138948873464,
          "upper": 1084.6875933254123,
          "category": "chinese",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_coding_rating",
          "value": 1152.1171231302637,
          "normalized_0_100": 51.63,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 285,
          "lower": 1144.5810407058586,
          "upper": 1159.6532055546688,
          "category": "coding",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_overall_rating",
          "value": 1165.647828499389,
          "normalized_0_100": 53.24,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 279,
          "lower": 1161.9353567279181,
          "upper": 1169.36030027086,
          "category": "overall",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        }
      ]
    },
    {
      "model_key": "mistral-medium",
      "display_name": "mistral-medium",
      "provider": "mistral",
      "aliases": [
        "mistral-medium"
      ],
      "openness": null,
      "license": "Proprietary",
      "parameter_billions": null,
      "context_window": null,
      "modalities": [
        "text"
      ],
      "source_ids": [
        "lmarena"
      ],
      "composite": {
        "score_0_100": 53.19,
        "confidence": "low",
        "components": [
          {
            "source_id": "lmarena",
            "metric": "lmarena_text_overall_rating",
            "normalized_0_100": 53.19,
            "weight": 0.35
          }
        ],
        "derived_from": [
          "lmarena:lmarena_text_overall_rating"
        ],
        "limitations": [
          "This is an external model-intelligence prior, not a RunLocalAI local speed or fit measurement.",
          "Provider papers, human-preference ratings, and LiveBench judgments are not interchangeable with quantized local execution.",
          "Use this score only beside Will-It-Run fit, quantization, context, runtime, and measured tok/s evidence."
        ]
      },
      "scores": [
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_chinese_rating",
          "value": 1108.1254185102603,
          "normalized_0_100": 46.4,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 280,
          "lower": 1094.6917751530877,
          "upper": 1121.5590618674332,
          "category": "chinese",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_coding_rating",
          "value": 1162.5717816796518,
          "normalized_0_100": 52.87,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 279,
          "lower": 1152.1881128077966,
          "upper": 1172.9554505515068,
          "category": "coding",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_overall_rating",
          "value": 1165.1819055400797,
          "normalized_0_100": 53.19,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 280,
          "lower": 1159.7450499160368,
          "upper": 1170.6187611641228,
          "category": "overall",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        }
      ]
    },
    {
      "model_key": "redhatai-nvidia-nemotron-3-super-120b-a12b-bf16",
      "display_name": "RedHatAI/NVIDIA-Nemotron-3-Super-120B-A12B-BF16",
      "provider": "RedHatAI",
      "aliases": [
        "RedHatAI/NVIDIA-Nemotron-3-Super-120B-A12B-BF16"
      ],
      "openness": "open",
      "license": "Unknown",
      "parameter_billions": 123.6,
      "context_window": null,
      "modalities": [
        "text"
      ],
      "source_ids": [
        "openevals"
      ],
      "composite": {
        "score_0_100": 53.19,
        "confidence": "low",
        "components": [
          {
            "source_id": "openevals",
            "metric": "openevals_aggregate",
            "normalized_0_100": 53.19,
            "weight": 0.45
          }
        ],
        "derived_from": [
          "openevals:openevals_aggregate"
        ],
        "limitations": [
          "This is an external model-intelligence prior, not a RunLocalAI local speed or fit measurement.",
          "Provider papers, human-preference ratings, and LiveBench judgments are not interchangeable with quantized local execution.",
          "Use this score only beside Will-It-Run fit, quantization, context, runtime, and measured tok/s evidence."
        ]
      },
      "scores": [
        {
          "source_id": "openevals",
          "metric": "gpqa",
          "value": 79.23,
          "normalized_0_100": 79.23,
          "scale": "0-100",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/OpenEvals/leaderboard-data",
          "source_dataset": "OpenEvals/leaderboard-data",
          "source_config": "default",
          "source_split": "train",
          "source_row_count": 105,
          "captured_at": "2026-05-28T11:57:28.766Z",
          "evidence_note": "External model quality prior from OpenEvals; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "openevals",
          "metric": "hle",
          "value": 18.26,
          "normalized_0_100": 18.26,
          "scale": "0-100",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/OpenEvals/leaderboard-data",
          "source_dataset": "OpenEvals/leaderboard-data",
          "source_config": "default",
          "source_split": "train",
          "source_row_count": 105,
          "captured_at": "2026-05-28T11:57:28.766Z",
          "evidence_note": "External model quality prior from OpenEvals; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "openevals",
          "metric": "mmlu_pro",
          "value": 83.73,
          "normalized_0_100": 83.73,
          "scale": "0-100",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/OpenEvals/leaderboard-data",
          "source_dataset": "OpenEvals/leaderboard-data",
          "source_config": "default",
          "source_split": "train",
          "source_row_count": 105,
          "captured_at": "2026-05-28T11:57:28.766Z",
          "evidence_note": "External model quality prior from OpenEvals; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "openevals",
          "metric": "openevals_aggregate",
          "value": 53.19,
          "normalized_0_100": 53.19,
          "scale": "0-100",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/OpenEvals/leaderboard-data",
          "source_dataset": "OpenEvals/leaderboard-data",
          "source_config": "default",
          "source_split": "train",
          "source_row_count": 105,
          "captured_at": "2026-05-28T11:57:28.766Z",
          "evidence_note": "External model quality prior from OpenEvals; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "openevals",
          "metric": "swe_verified",
          "value": 53.73,
          "normalized_0_100": 53.73,
          "scale": "0-100",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/OpenEvals/leaderboard-data",
          "source_dataset": "OpenEvals/leaderboard-data",
          "source_config": "default",
          "source_split": "train",
          "source_row_count": 105,
          "captured_at": "2026-05-28T11:57:28.766Z",
          "evidence_note": "External model quality prior from OpenEvals; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "openevals",
          "metric": "terminal_bench",
          "value": 31,
          "normalized_0_100": 31,
          "scale": "0-100",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/OpenEvals/leaderboard-data",
          "source_dataset": "OpenEvals/leaderboard-data",
          "source_config": "default",
          "source_split": "train",
          "source_row_count": 105,
          "captured_at": "2026-05-28T11:57:28.766Z",
          "evidence_note": "External model quality prior from OpenEvals; not a RunLocalAI local execution measurement."
        }
      ]
    },
    {
      "model_key": "reka-flash-21b-20240226",
      "display_name": "reka-flash-21b-20240226",
      "provider": null,
      "aliases": [
        "reka-flash-21b-20240226"
      ],
      "openness": null,
      "license": "Proprietary",
      "parameter_billions": null,
      "context_window": null,
      "modalities": [
        "text"
      ],
      "source_ids": [
        "lmarena"
      ],
      "composite": {
        "score_0_100": 53.17,
        "confidence": "low",
        "components": [
          {
            "source_id": "lmarena",
            "metric": "lmarena_text_overall_rating",
            "normalized_0_100": 53.17,
            "weight": 0.35
          }
        ],
        "derived_from": [
          "lmarena:lmarena_text_overall_rating"
        ],
        "limitations": [
          "This is an external model-intelligence prior, not a RunLocalAI local speed or fit measurement.",
          "Provider papers, human-preference ratings, and LiveBench judgments are not interchangeable with quantized local execution.",
          "Use this score only beside Will-It-Run fit, quantization, context, runtime, and measured tok/s evidence."
        ]
      },
      "scores": [
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_chinese_rating",
          "value": 1134.3803975678647,
          "normalized_0_100": 49.52,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 272,
          "lower": 1121.002074667615,
          "upper": 1147.7587204681145,
          "category": "chinese",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_coding_rating",
          "value": 1154.1760761652126,
          "normalized_0_100": 51.88,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 284,
          "lower": 1143.5395428850984,
          "upper": 1164.8126094453266,
          "category": "coding",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_overall_rating",
          "value": 1165.0321005773962,
          "normalized_0_100": 53.17,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 281,
          "lower": 1159.1302665828682,
          "upper": 1170.9339345719245,
          "category": "overall",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        }
      ]
    },
    {
      "model_key": "command-r",
      "display_name": "command-r",
      "provider": "cohere",
      "aliases": [
        "command-r"
      ],
      "openness": null,
      "license": "CC-BY-NC-4.0",
      "parameter_billions": null,
      "context_window": null,
      "modalities": [
        "text"
      ],
      "source_ids": [
        "lmarena"
      ],
      "composite": {
        "score_0_100": 52.95,
        "confidence": "low",
        "components": [
          {
            "source_id": "lmarena",
            "metric": "lmarena_text_overall_rating",
            "normalized_0_100": 52.95,
            "weight": 0.35
          }
        ],
        "derived_from": [
          "lmarena:lmarena_text_overall_rating"
        ],
        "limitations": [
          "This is an external model-intelligence prior, not a RunLocalAI local speed or fit measurement.",
          "Provider papers, human-preference ratings, and LiveBench judgments are not interchangeable with quantized local execution.",
          "Use this score only beside Will-It-Run fit, quantization, context, runtime, and measured tok/s evidence."
        ]
      },
      "scores": [
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_chinese_rating",
          "value": 1145.483699947852,
          "normalized_0_100": 50.84,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 265,
          "lower": 1134.8924442107232,
          "upper": 1156.0749556849812,
          "category": "chinese",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_coding_rating",
          "value": 1128.198338529275,
          "normalized_0_100": 48.79,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 293,
          "lower": 1119.5489592583676,
          "upper": 1136.8477178001822,
          "category": "coding",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_overall_rating",
          "value": 1163.1784534602753,
          "normalized_0_100": 52.95,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 282,
          "lower": 1158.4431213684668,
          "upper": 1167.9137855520835,
          "category": "overall",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        }
      ]
    },
    {
      "model_key": "qwen2-math-72b-instruct",
      "display_name": "qwen2-math-72b-instruct",
      "provider": null,
      "aliases": [
        "qwen2-math-72b-instruct"
      ],
      "openness": null,
      "license": null,
      "parameter_billions": null,
      "context_window": null,
      "modalities": [
        "text"
      ],
      "source_ids": [
        "livebench"
      ],
      "composite": {
        "score_0_100": 52.94,
        "confidence": "low",
        "components": [
          {
            "source_id": "livebench",
            "metric": "livebench_overall_mean_judgment",
            "normalized_0_100": 52.94,
            "weight": 0.2
          }
        ],
        "derived_from": [
          "livebench:livebench_overall_mean_judgment"
        ],
        "limitations": [
          "This is an external model-intelligence prior, not a RunLocalAI local speed or fit measurement.",
          "Provider papers, human-preference ratings, and LiveBench judgments are not interchangeable with quantized local execution.",
          "Use this score only beside Will-It-Run fit, quantization, context, runtime, and measured tok/s evidence."
        ]
      },
      "scores": [
        {
          "source_id": "livebench",
          "metric": "livebench_category_coding_mean_judgment",
          "value": 1,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "coding",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench category aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_category_instruction_following_mean_judgment",
          "value": 1,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "instruction_following",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench category aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_category_language_mean_judgment",
          "value": 0.058824,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "language",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench category aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_overall_mean_judgment",
          "value": 0.529412,
          "normalized_0_100": 52.94,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench judgment aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_task_connections_mean_judgment",
          "value": 0,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "language",
          "task": "connections",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench task aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_task_lcb_generation_mean_judgment",
          "value": 1,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "coding",
          "task": "LCB_generation",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench task aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_task_plot_unscrambling_mean_judgment",
          "value": 0.176471,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "language",
          "task": "plot_unscrambling",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench task aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_task_story_generation_mean_judgment",
          "value": 1,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "instruction_following",
          "task": "story_generation",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench task aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_task_typos_mean_judgment",
          "value": 0,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "language",
          "task": "typos",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench task aggregate; not a RunLocalAI local execution measurement."
        }
      ]
    },
    {
      "model_key": "meituan-longcat-longcat-flash-thinking-2601",
      "display_name": "meituan-longcat/LongCat-Flash-Thinking-2601",
      "provider": "meituan-longcat",
      "aliases": [
        "meituan-longcat/LongCat-Flash-Thinking-2601"
      ],
      "openness": "open",
      "license": "Unknown",
      "parameter_billions": 561.9,
      "context_window": null,
      "modalities": [
        "text"
      ],
      "source_ids": [
        "openevals"
      ],
      "composite": {
        "score_0_100": 52.85,
        "confidence": "low",
        "components": [
          {
            "source_id": "openevals",
            "metric": "openevals_aggregate",
            "normalized_0_100": 52.85,
            "weight": 0.45
          }
        ],
        "derived_from": [
          "openevals:openevals_aggregate"
        ],
        "limitations": [
          "This is an external model-intelligence prior, not a RunLocalAI local speed or fit measurement.",
          "Provider papers, human-preference ratings, and LiveBench judgments are not interchangeable with quantized local execution.",
          "Use this score only beside Will-It-Run fit, quantization, context, runtime, and measured tok/s evidence."
        ]
      },
      "scores": [
        {
          "source_id": "openevals",
          "metric": "gpqa",
          "value": 80.5,
          "normalized_0_100": 80.5,
          "scale": "0-100",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/OpenEvals/leaderboard-data",
          "source_dataset": "OpenEvals/leaderboard-data",
          "source_config": "default",
          "source_split": "train",
          "source_row_count": 105,
          "captured_at": "2026-05-28T11:57:28.766Z",
          "evidence_note": "External model quality prior from OpenEvals; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "openevals",
          "metric": "hle",
          "value": 25.2,
          "normalized_0_100": 25.2,
          "scale": "0-100",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/OpenEvals/leaderboard-data",
          "source_dataset": "OpenEvals/leaderboard-data",
          "source_config": "default",
          "source_split": "train",
          "source_row_count": 105,
          "captured_at": "2026-05-28T11:57:28.766Z",
          "evidence_note": "External model quality prior from OpenEvals; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "openevals",
          "metric": "openevals_aggregate",
          "value": 52.85,
          "normalized_0_100": 52.85,
          "scale": "0-100",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/OpenEvals/leaderboard-data",
          "source_dataset": "OpenEvals/leaderboard-data",
          "source_config": "default",
          "source_split": "train",
          "source_row_count": 105,
          "captured_at": "2026-05-28T11:57:28.766Z",
          "evidence_note": "External model quality prior from OpenEvals; not a RunLocalAI local execution measurement."
        }
      ]
    },
    {
      "model_key": "grok-beta",
      "display_name": "grok-beta",
      "provider": null,
      "aliases": [
        "grok-beta"
      ],
      "openness": null,
      "license": null,
      "parameter_billions": null,
      "context_window": null,
      "modalities": [
        "text"
      ],
      "source_ids": [
        "livebench"
      ],
      "composite": {
        "score_0_100": 52.7,
        "confidence": "low",
        "components": [
          {
            "source_id": "livebench",
            "metric": "livebench_overall_mean_judgment",
            "normalized_0_100": 52.7,
            "weight": 0.2
          }
        ],
        "derived_from": [
          "livebench:livebench_overall_mean_judgment"
        ],
        "limitations": [
          "This is an external model-intelligence prior, not a RunLocalAI local speed or fit measurement.",
          "Provider papers, human-preference ratings, and LiveBench judgments are not interchangeable with quantized local execution.",
          "Use this score only beside Will-It-Run fit, quantization, context, runtime, and measured tok/s evidence."
        ]
      },
      "scores": [
        {
          "source_id": "livebench",
          "metric": "livebench_category_coding_mean_judgment",
          "value": 0.5,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "coding",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench category aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_category_instruction_following_mean_judgment",
          "value": 1,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "instruction_following",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench category aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_category_language_mean_judgment",
          "value": 0.422193,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "language",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench category aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_overall_mean_judgment",
          "value": 0.526967,
          "normalized_0_100": 52.7,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench judgment aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_task_coding_completion_mean_judgment",
          "value": 0,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "coding",
          "task": "coding_completion",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench task aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_task_lcb_generation_mean_judgment",
          "value": 1,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "coding",
          "task": "LCB_generation",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench task aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_task_paraphrase_mean_judgment",
          "value": 1,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "instruction_following",
          "task": "paraphrase",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench task aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_task_plot_unscrambling_mean_judgment",
          "value": 0.344385,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "language",
          "task": "plot_unscrambling",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench task aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_task_typos_mean_judgment",
          "value": 0.5,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "language",
          "task": "typos",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench task aggregate; not a RunLocalAI local execution measurement."
        }
      ]
    },
    {
      "model_key": "swe-lego-swe-lego-qwen3-32b",
      "display_name": "SWE-Lego/SWE-Lego-Qwen3-32B",
      "provider": "SWE-Lego",
      "aliases": [
        "SWE-Lego/SWE-Lego-Qwen3-32B"
      ],
      "openness": "open",
      "license": "Unknown",
      "parameter_billions": 0,
      "context_window": null,
      "modalities": [
        "text"
      ],
      "source_ids": [
        "openevals"
      ],
      "composite": {
        "score_0_100": 52.6,
        "confidence": "low",
        "components": [
          {
            "source_id": "openevals",
            "metric": "openevals_aggregate",
            "normalized_0_100": 52.6,
            "weight": 0.45
          }
        ],
        "derived_from": [
          "openevals:openevals_aggregate"
        ],
        "limitations": [
          "This is an external model-intelligence prior, not a RunLocalAI local speed or fit measurement.",
          "Provider papers, human-preference ratings, and LiveBench judgments are not interchangeable with quantized local execution.",
          "Use this score only beside Will-It-Run fit, quantization, context, runtime, and measured tok/s evidence."
        ]
      },
      "scores": [
        {
          "source_id": "openevals",
          "metric": "openevals_aggregate",
          "value": 52.6,
          "normalized_0_100": 52.6,
          "scale": "0-100",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/OpenEvals/leaderboard-data",
          "source_dataset": "OpenEvals/leaderboard-data",
          "source_config": "default",
          "source_split": "train",
          "source_row_count": 105,
          "captured_at": "2026-05-28T11:57:28.766Z",
          "evidence_note": "External model quality prior from OpenEvals; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "openevals",
          "metric": "swe_verified",
          "value": 52.6,
          "normalized_0_100": 52.6,
          "scale": "0-100",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/OpenEvals/leaderboard-data",
          "source_dataset": "OpenEvals/leaderboard-data",
          "source_config": "default",
          "source_split": "train",
          "source_row_count": 105,
          "captured_at": "2026-05-28T11:57:28.766Z",
          "evidence_note": "External model quality prior from OpenEvals; not a RunLocalAI local execution measurement."
        }
      ]
    },
    {
      "model_key": "internlm2-5-20b-chat",
      "display_name": "internlm2_5-20b-chat",
      "provider": null,
      "aliases": [
        "internlm2_5-20b-chat"
      ],
      "openness": null,
      "license": "Other",
      "parameter_billions": null,
      "context_window": null,
      "modalities": [
        "text"
      ],
      "source_ids": [
        "lmarena"
      ],
      "composite": {
        "score_0_100": 52.39,
        "confidence": "low",
        "components": [
          {
            "source_id": "lmarena",
            "metric": "lmarena_text_overall_rating",
            "normalized_0_100": 52.39,
            "weight": 0.35
          }
        ],
        "derived_from": [
          "lmarena:lmarena_text_overall_rating"
        ],
        "limitations": [
          "This is an external model-intelligence prior, not a RunLocalAI local speed or fit measurement.",
          "Provider papers, human-preference ratings, and LiveBench judgments are not interchangeable with quantized local execution.",
          "Use this score only beside Will-It-Run fit, quantization, context, runtime, and measured tok/s evidence."
        ]
      },
      "scores": [
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_chinese_rating",
          "value": 1198.5108615630281,
          "normalized_0_100": 57.15,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 248,
          "lower": 1179.8864419967094,
          "upper": 1217.1352811293468,
          "category": "chinese",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_coding_rating",
          "value": 1159.5313847361567,
          "normalized_0_100": 52.51,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 281,
          "lower": 1145.4651805321582,
          "upper": 1173.597588940155,
          "category": "coding",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_overall_rating",
          "value": 1158.4887272509177,
          "normalized_0_100": 52.39,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 285,
          "lower": 1151.433489203495,
          "upper": 1165.5439652983405,
          "category": "overall",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        }
      ]
    },
    {
      "model_key": "meta-llama-3-70b-instruct",
      "display_name": "Meta-Llama-3-70B-Instruct",
      "provider": null,
      "aliases": [
        "Meta-Llama-3-70B-Instruct"
      ],
      "openness": null,
      "license": null,
      "parameter_billions": null,
      "context_window": null,
      "modalities": [
        "text"
      ],
      "source_ids": [
        "livebench"
      ],
      "composite": {
        "score_0_100": 52.26,
        "confidence": "low",
        "components": [
          {
            "source_id": "livebench",
            "metric": "livebench_overall_mean_judgment",
            "normalized_0_100": 52.26,
            "weight": 0.2
          }
        ],
        "derived_from": [
          "livebench:livebench_overall_mean_judgment"
        ],
        "limitations": [
          "This is an external model-intelligence prior, not a RunLocalAI local speed or fit measurement.",
          "Provider papers, human-preference ratings, and LiveBench judgments are not interchangeable with quantized local execution.",
          "Use this score only beside Will-It-Run fit, quantization, context, runtime, and measured tok/s evidence."
        ]
      },
      "scores": [
        {
          "source_id": "livebench",
          "metric": "livebench_category_coding_mean_judgment",
          "value": 0,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "coding",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench category aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_category_instruction_following_mean_judgment",
          "value": 0.6875,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "instruction_following",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench category aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_category_language_mean_judgment",
          "value": 0.619048,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "language",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench category aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_overall_mean_judgment",
          "value": 0.522619,
          "normalized_0_100": 52.26,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench judgment aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_task_coding_completion_mean_judgment",
          "value": 0,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "coding",
          "task": "coding_completion",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench task aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_task_connections_mean_judgment",
          "value": 1,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "language",
          "task": "connections",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench task aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_task_plot_unscrambling_mean_judgment",
          "value": 0.238095,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "language",
          "task": "plot_unscrambling",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench task aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_task_story_generation_mean_judgment",
          "value": 0.6875,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "instruction_following",
          "task": "story_generation",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench task aggregate; not a RunLocalAI local execution measurement."
        }
      ]
    },
    {
      "model_key": "grok-2-mini",
      "display_name": "grok-2-mini",
      "provider": null,
      "aliases": [
        "grok-2-mini"
      ],
      "openness": null,
      "license": null,
      "parameter_billions": null,
      "context_window": null,
      "modalities": [
        "text"
      ],
      "source_ids": [
        "livebench"
      ],
      "composite": {
        "score_0_100": 52.1,
        "confidence": "low",
        "components": [
          {
            "source_id": "livebench",
            "metric": "livebench_overall_mean_judgment",
            "normalized_0_100": 52.1,
            "weight": 0.2
          }
        ],
        "derived_from": [
          "livebench:livebench_overall_mean_judgment"
        ],
        "limitations": [
          "This is an external model-intelligence prior, not a RunLocalAI local speed or fit measurement.",
          "Provider papers, human-preference ratings, and LiveBench judgments are not interchangeable with quantized local execution.",
          "Use this score only beside Will-It-Run fit, quantization, context, runtime, and measured tok/s evidence."
        ]
      },
      "scores": [
        {
          "source_id": "livebench",
          "metric": "livebench_category_coding_mean_judgment",
          "value": 0.5,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "coding",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench category aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_category_instruction_following_mean_judgment",
          "value": 1,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "instruction_following",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench category aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_category_language_mean_judgment",
          "value": 0.411765,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "language",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench category aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_overall_mean_judgment",
          "value": 0.521008,
          "normalized_0_100": 52.1,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench judgment aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_task_coding_completion_mean_judgment",
          "value": 0,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "coding",
          "task": "coding_completion",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench task aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_task_connections_mean_judgment",
          "value": 0.5,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "language",
          "task": "connections",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench task aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_task_lcb_generation_mean_judgment",
          "value": 1,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "coding",
          "task": "LCB_generation",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench task aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_task_plot_unscrambling_mean_judgment",
          "value": 0.647059,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "language",
          "task": "plot_unscrambling",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench task aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_task_story_generation_mean_judgment",
          "value": 1,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "instruction_following",
          "task": "story_generation",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench task aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_task_typos_mean_judgment",
          "value": 0,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "language",
          "task": "typos",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench task aggregate; not a RunLocalAI local execution measurement."
        }
      ]
    },
    {
      "model_key": "gemma-2-2b-it",
      "display_name": "gemma-2-2b-it",
      "provider": "google",
      "aliases": [
        "gemma-2-2b-it"
      ],
      "openness": null,
      "license": "Gemma license",
      "parameter_billions": null,
      "context_window": null,
      "modalities": [
        "text"
      ],
      "source_ids": [
        "lmarena"
      ],
      "composite": {
        "score_0_100": 52.07,
        "confidence": "low",
        "components": [
          {
            "source_id": "lmarena",
            "metric": "lmarena_text_overall_rating",
            "normalized_0_100": 52.07,
            "weight": 0.35
          }
        ],
        "derived_from": [
          "lmarena:lmarena_text_overall_rating"
        ],
        "limitations": [
          "This is an external model-intelligence prior, not a RunLocalAI local speed or fit measurement.",
          "Provider papers, human-preference ratings, and LiveBench judgments are not interchangeable with quantized local execution.",
          "Use this score only beside Will-It-Run fit, quantization, context, runtime, and measured tok/s evidence."
        ]
      },
      "scores": [
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_chinese_rating",
          "value": 1131.228939681339,
          "normalized_0_100": 49.15,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 273,
          "lower": 1120.5320158636216,
          "upper": 1141.9258634990565,
          "category": "chinese",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_coding_rating",
          "value": 1112.785295313392,
          "normalized_0_100": 46.95,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 299,
          "lower": 1104.915234577578,
          "upper": 1120.6553560492061,
          "category": "coding",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_overall_rating",
          "value": 1155.8129226393335,
          "normalized_0_100": 52.07,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 286,
          "lower": 1151.7947613411648,
          "upper": 1159.8310839375022,
          "category": "overall",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        }
      ]
    },
    {
      "model_key": "gemini-2-0-flash-thinking-exp-01-21",
      "display_name": "gemini-2.0-flash-thinking-exp-01-21",
      "provider": null,
      "aliases": [
        "gemini-2.0-flash-thinking-exp-01-21"
      ],
      "openness": null,
      "license": null,
      "parameter_billions": null,
      "context_window": null,
      "modalities": [
        "text"
      ],
      "source_ids": [
        "livebench"
      ],
      "composite": {
        "score_0_100": 51.98,
        "confidence": "low",
        "components": [
          {
            "source_id": "livebench",
            "metric": "livebench_overall_mean_judgment",
            "normalized_0_100": 51.98,
            "weight": 0.2
          }
        ],
        "derived_from": [
          "livebench:livebench_overall_mean_judgment"
        ],
        "limitations": [
          "This is an external model-intelligence prior, not a RunLocalAI local speed or fit measurement.",
          "Provider papers, human-preference ratings, and LiveBench judgments are not interchangeable with quantized local execution.",
          "Use this score only beside Will-It-Run fit, quantization, context, runtime, and measured tok/s evidence."
        ]
      },
      "scores": [
        {
          "source_id": "livebench",
          "metric": "livebench_category_coding_mean_judgment",
          "value": 1,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "coding",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench category aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_category_instruction_following_mean_judgment",
          "value": 1,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "instruction_following",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench category aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_category_language_mean_judgment",
          "value": 0.327701,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "language",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench category aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_overall_mean_judgment",
          "value": 0.519786,
          "normalized_0_100": 51.98,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench judgment aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_task_lcb_generation_mean_judgment",
          "value": 1,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "coding",
          "task": "LCB_generation",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench task aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_task_paraphrase_mean_judgment",
          "value": 1,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "instruction_following",
          "task": "paraphrase",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench task aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_task_plot_unscrambling_mean_judgment",
          "value": 0.319251,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "language",
          "task": "plot_unscrambling",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench task aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_task_typos_mean_judgment",
          "value": 0.333333,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "language",
          "task": "typos",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench task aggregate; not a RunLocalAI local execution measurement."
        }
      ]
    },
    {
      "model_key": "gemini-1-5-flash-8b-exp-0827",
      "display_name": "gemini-1.5-flash-8b-exp-0827",
      "provider": null,
      "aliases": [
        "gemini-1.5-flash-8b-exp-0827"
      ],
      "openness": null,
      "license": null,
      "parameter_billions": null,
      "context_window": null,
      "modalities": [
        "text"
      ],
      "source_ids": [
        "livebench"
      ],
      "composite": {
        "score_0_100": 51.93,
        "confidence": "low",
        "components": [
          {
            "source_id": "livebench",
            "metric": "livebench_overall_mean_judgment",
            "normalized_0_100": 51.93,
            "weight": 0.2
          }
        ],
        "derived_from": [
          "livebench:livebench_overall_mean_judgment"
        ],
        "limitations": [
          "This is an external model-intelligence prior, not a RunLocalAI local speed or fit measurement.",
          "Provider papers, human-preference ratings, and LiveBench judgments are not interchangeable with quantized local execution.",
          "Use this score only beside Will-It-Run fit, quantization, context, runtime, and measured tok/s evidence."
        ]
      },
      "scores": [
        {
          "source_id": "livebench",
          "metric": "livebench_category_coding_mean_judgment",
          "value": 0,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "coding",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench category aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_category_instruction_following_mean_judgment",
          "value": 0.7,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "instruction_following",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench category aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_category_language_mean_judgment",
          "value": 0.745098,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "language",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench category aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_overall_mean_judgment",
          "value": 0.519328,
          "normalized_0_100": 51.93,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench judgment aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_task_connections_mean_judgment",
          "value": 1,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "language",
          "task": "connections",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench task aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_task_lcb_generation_mean_judgment",
          "value": 0,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "coding",
          "task": "LCB_generation",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench task aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_task_paraphrase_mean_judgment",
          "value": 0.4,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "instruction_following",
          "task": "paraphrase",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench task aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_task_plot_unscrambling_mean_judgment",
          "value": 0.235294,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "language",
          "task": "plot_unscrambling",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench task aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_task_story_generation_mean_judgment",
          "value": 1,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "instruction_following",
          "task": "story_generation",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench task aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_task_typos_mean_judgment",
          "value": 1,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "language",
          "task": "typos",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench task aggregate; not a RunLocalAI local execution measurement."
        }
      ]
    },
    {
      "model_key": "granite-3-1-8b-instruct",
      "display_name": "granite-3.1-8b-instruct",
      "provider": "ibm",
      "aliases": [
        "granite-3.1-8b-instruct"
      ],
      "openness": null,
      "license": "Apache 2.0",
      "parameter_billions": null,
      "context_window": null,
      "modalities": [
        "text"
      ],
      "source_ids": [
        "lmarena"
      ],
      "composite": {
        "score_0_100": 51.33,
        "confidence": "low",
        "components": [
          {
            "source_id": "lmarena",
            "metric": "lmarena_text_overall_rating",
            "normalized_0_100": 51.33,
            "weight": 0.35
          }
        ],
        "derived_from": [
          "lmarena:lmarena_text_overall_rating"
        ],
        "limitations": [
          "This is an external model-intelligence prior, not a RunLocalAI local speed or fit measurement.",
          "Provider papers, human-preference ratings, and LiveBench judgments are not interchangeable with quantized local execution.",
          "Use this score only beside Will-It-Run fit, quantization, context, runtime, and measured tok/s evidence."
        ]
      },
      "scores": [
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_chinese_rating",
          "value": 1143.9728988599043,
          "normalized_0_100": 50.66,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 266,
          "lower": 1104.6948569987467,
          "upper": 1183.2509407210619,
          "category": "chinese",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_coding_rating",
          "value": 1186.8638418282458,
          "normalized_0_100": 55.76,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 267,
          "lower": 1161.3207870804715,
          "upper": 1212.4068965760198,
          "category": "coding",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_overall_rating",
          "value": 1149.5818456545687,
          "normalized_0_100": 51.33,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 287,
          "lower": 1138.6319409534071,
          "upper": 1160.53175035573,
          "category": "overall",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        }
      ]
    },
    {
      "model_key": "gemini-pro-dev-api",
      "display_name": "gemini-pro-dev-api",
      "provider": "google",
      "aliases": [
        "gemini-pro-dev-api"
      ],
      "openness": null,
      "license": "Proprietary",
      "parameter_billions": null,
      "context_window": null,
      "modalities": [
        "text"
      ],
      "source_ids": [
        "lmarena"
      ],
      "composite": {
        "score_0_100": 51.25,
        "confidence": "low",
        "components": [
          {
            "source_id": "lmarena",
            "metric": "lmarena_text_overall_rating",
            "normalized_0_100": 51.25,
            "weight": 0.35
          }
        ],
        "derived_from": [
          "lmarena:lmarena_text_overall_rating"
        ],
        "limitations": [
          "This is an external model-intelligence prior, not a RunLocalAI local speed or fit measurement.",
          "Provider papers, human-preference ratings, and LiveBench judgments are not interchangeable with quantized local execution.",
          "Use this score only beside Will-It-Run fit, quantization, context, runtime, and measured tok/s evidence."
        ]
      },
      "scores": [
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_chinese_rating",
          "value": 1123.0944802869808,
          "normalized_0_100": 48.18,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 275,
          "lower": 1105.8278804911743,
          "upper": 1140.3610800827873,
          "category": "chinese",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_coding_rating",
          "value": 1104.5042524507985,
          "normalized_0_100": 45.97,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 303,
          "lower": 1090.7180892555616,
          "upper": 1118.2904156460359,
          "category": "coding",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_overall_rating",
          "value": 1148.8915823934176,
          "normalized_0_100": 51.25,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 288,
          "lower": 1141.6766343435702,
          "upper": 1156.106530443265,
          "category": "overall",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        }
      ]
    },
    {
      "model_key": "zephyr-orpo-141b-a35b-v0-1",
      "display_name": "zephyr-orpo-141b-A35b-v0.1",
      "provider": null,
      "aliases": [
        "zephyr-orpo-141b-A35b-v0.1"
      ],
      "openness": null,
      "license": "Apache 2.0",
      "parameter_billions": null,
      "context_window": null,
      "modalities": [
        "text"
      ],
      "source_ids": [
        "lmarena"
      ],
      "composite": {
        "score_0_100": 50.64,
        "confidence": "low",
        "components": [
          {
            "source_id": "lmarena",
            "metric": "lmarena_text_overall_rating",
            "normalized_0_100": 50.64,
            "weight": 0.35
          }
        ],
        "derived_from": [
          "lmarena:lmarena_text_overall_rating"
        ],
        "limitations": [
          "This is an external model-intelligence prior, not a RunLocalAI local speed or fit measurement.",
          "Provider papers, human-preference ratings, and LiveBench judgments are not interchangeable with quantized local execution.",
          "Use this score only beside Will-It-Run fit, quantization, context, runtime, and measured tok/s evidence."
        ]
      },
      "scores": [
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_chinese_rating",
          "value": 1081.038678286472,
          "normalized_0_100": 43.18,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 284,
          "lower": 1057.8985616587067,
          "upper": 1104.1787949142372,
          "category": "chinese",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_coding_rating",
          "value": 1129.7341363687367,
          "normalized_0_100": 48.97,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 292,
          "lower": 1109.238098908219,
          "upper": 1150.2301738292545,
          "category": "coding",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_overall_rating",
          "value": 1143.7358983077438,
          "normalized_0_100": 50.64,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 289,
          "lower": 1132.9869002409469,
          "upper": 1154.4848963745408,
          "category": "overall",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        }
      ]
    },
    {
      "model_key": "snurfyai-sn-logicer-0-8b",
      "display_name": "SnurfyAI/Sn-Logicer-0.8B",
      "provider": "SnurfyAI",
      "aliases": [
        "SnurfyAI/Sn-Logicer-0.8B"
      ],
      "openness": "open",
      "license": "Unknown",
      "parameter_billions": 0.8,
      "context_window": null,
      "modalities": [
        "text"
      ],
      "source_ids": [
        "openevals"
      ],
      "composite": {
        "score_0_100": 50.57,
        "confidence": "low",
        "components": [
          {
            "source_id": "openevals",
            "metric": "openevals_aggregate",
            "normalized_0_100": 50.57,
            "weight": 0.45
          }
        ],
        "derived_from": [
          "openevals:openevals_aggregate"
        ],
        "limitations": [
          "This is an external model-intelligence prior, not a RunLocalAI local speed or fit measurement.",
          "Provider papers, human-preference ratings, and LiveBench judgments are not interchangeable with quantized local execution.",
          "Use this score only beside Will-It-Run fit, quantization, context, runtime, and measured tok/s evidence."
        ]
      },
      "scores": [
        {
          "source_id": "openevals",
          "metric": "gsm8k",
          "value": 50.57,
          "normalized_0_100": 50.57,
          "scale": "0-100",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/OpenEvals/leaderboard-data",
          "source_dataset": "OpenEvals/leaderboard-data",
          "source_config": "default",
          "source_split": "train",
          "source_row_count": 105,
          "captured_at": "2026-05-28T11:57:28.766Z",
          "evidence_note": "External model quality prior from OpenEvals; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "openevals",
          "metric": "openevals_aggregate",
          "value": 50.57,
          "normalized_0_100": 50.57,
          "scale": "0-100",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/OpenEvals/leaderboard-data",
          "source_dataset": "OpenEvals/leaderboard-data",
          "source_config": "default",
          "source_split": "train",
          "source_row_count": 105,
          "captured_at": "2026-05-28T11:57:28.766Z",
          "evidence_note": "External model quality prior from OpenEvals; not a RunLocalAI local execution measurement."
        }
      ]
    },
    {
      "model_key": "qwen-qwen3-coder-next",
      "display_name": "Qwen/Qwen3-Coder-Next",
      "provider": "Qwen",
      "aliases": [
        "Qwen/Qwen3-Coder-Next"
      ],
      "openness": "open",
      "license": "Unknown",
      "parameter_billions": 79.7,
      "context_window": null,
      "modalities": [
        "text"
      ],
      "source_ids": [
        "openevals"
      ],
      "composite": {
        "score_0_100": 50.37,
        "confidence": "low",
        "components": [
          {
            "source_id": "openevals",
            "metric": "openevals_aggregate",
            "normalized_0_100": 50.37,
            "weight": 0.45
          }
        ],
        "derived_from": [
          "openevals:openevals_aggregate"
        ],
        "limitations": [
          "This is an external model-intelligence prior, not a RunLocalAI local speed or fit measurement.",
          "Provider papers, human-preference ratings, and LiveBench judgments are not interchangeable with quantized local execution.",
          "Use this score only beside Will-It-Run fit, quantization, context, runtime, and measured tok/s evidence."
        ]
      },
      "scores": [
        {
          "source_id": "openevals",
          "metric": "openevals_aggregate",
          "value": 50.37,
          "normalized_0_100": 50.37,
          "scale": "0-100",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/OpenEvals/leaderboard-data",
          "source_dataset": "OpenEvals/leaderboard-data",
          "source_config": "default",
          "source_split": "train",
          "source_row_count": 105,
          "captured_at": "2026-05-28T11:57:28.766Z",
          "evidence_note": "External model quality prior from OpenEvals; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "openevals",
          "metric": "swe_pro",
          "value": 44.3,
          "normalized_0_100": 44.3,
          "scale": "0-100",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/OpenEvals/leaderboard-data",
          "source_dataset": "OpenEvals/leaderboard-data",
          "source_config": "default",
          "source_split": "train",
          "source_row_count": 105,
          "captured_at": "2026-05-28T11:57:28.766Z",
          "evidence_note": "External model quality prior from OpenEvals; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "openevals",
          "metric": "swe_verified",
          "value": 70.6,
          "normalized_0_100": 70.6,
          "scale": "0-100",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/OpenEvals/leaderboard-data",
          "source_dataset": "OpenEvals/leaderboard-data",
          "source_config": "default",
          "source_split": "train",
          "source_row_count": 105,
          "captured_at": "2026-05-28T11:57:28.766Z",
          "evidence_note": "External model quality prior from OpenEvals; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "openevals",
          "metric": "terminal_bench",
          "value": 36.2,
          "normalized_0_100": 36.2,
          "scale": "0-100",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/OpenEvals/leaderboard-data",
          "source_dataset": "OpenEvals/leaderboard-data",
          "source_config": "default",
          "source_split": "train",
          "source_row_count": 105,
          "captured_at": "2026-05-28T11:57:28.766Z",
          "evidence_note": "External model quality prior from OpenEvals; not a RunLocalAI local execution measurement."
        }
      ]
    },
    {
      "model_key": "coding-meta-llama-3-1-70b-instruct-chk-50",
      "display_name": "coding-meta-llama-3.1-70b-instruct-chk-50",
      "provider": null,
      "aliases": [
        "coding-meta-llama-3.1-70b-instruct-chk-50"
      ],
      "openness": null,
      "license": null,
      "parameter_billions": null,
      "context_window": null,
      "modalities": [
        "text"
      ],
      "source_ids": [
        "livebench"
      ],
      "composite": {
        "score_0_100": 50,
        "confidence": "low",
        "components": [
          {
            "source_id": "livebench",
            "metric": "livebench_overall_mean_judgment",
            "normalized_0_100": 50,
            "weight": 0.2
          }
        ],
        "derived_from": [
          "livebench:livebench_overall_mean_judgment"
        ],
        "limitations": [
          "This is an external model-intelligence prior, not a RunLocalAI local speed or fit measurement.",
          "Provider papers, human-preference ratings, and LiveBench judgments are not interchangeable with quantized local execution.",
          "Use this score only beside Will-It-Run fit, quantization, context, runtime, and measured tok/s evidence."
        ]
      },
      "scores": [
        {
          "source_id": "livebench",
          "metric": "livebench_category_coding_mean_judgment",
          "value": 0,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "coding",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench category aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_category_instruction_following_mean_judgment",
          "value": 1,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "instruction_following",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench category aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_category_language_mean_judgment",
          "value": 1,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "language",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench category aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_overall_mean_judgment",
          "value": 0.5,
          "normalized_0_100": 50,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench judgment aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_task_connections_mean_judgment",
          "value": 1,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "language",
          "task": "connections",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench task aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_task_lcb_generation_mean_judgment",
          "value": 0,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "coding",
          "task": "LCB_generation",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench task aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_task_story_generation_mean_judgment",
          "value": 1,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "instruction_following",
          "task": "story_generation",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench task aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_task_typos_mean_judgment",
          "value": 1,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "language",
          "task": "typos",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench task aggregate; not a RunLocalAI local execution measurement."
        }
      ]
    },
    {
      "model_key": "qwen1-5-32b-chat",
      "display_name": "qwen1.5-32b-chat",
      "provider": "alibaba",
      "aliases": [
        "qwen1.5-32b-chat"
      ],
      "openness": null,
      "license": "Qianwen LICENSE",
      "parameter_billions": null,
      "context_window": null,
      "modalities": [
        "text"
      ],
      "source_ids": [
        "lmarena"
      ],
      "composite": {
        "score_0_100": 49.86,
        "confidence": "low",
        "components": [
          {
            "source_id": "lmarena",
            "metric": "lmarena_text_overall_rating",
            "normalized_0_100": 49.86,
            "weight": 0.35
          }
        ],
        "derived_from": [
          "lmarena:lmarena_text_overall_rating"
        ],
        "limitations": [
          "This is an external model-intelligence prior, not a RunLocalAI local speed or fit measurement.",
          "Provider papers, human-preference ratings, and LiveBench judgments are not interchangeable with quantized local execution.",
          "Use this score only beside Will-It-Run fit, quantization, context, runtime, and measured tok/s evidence."
        ]
      },
      "scores": [
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_chinese_rating",
          "value": 1175.5609990991713,
          "normalized_0_100": 54.42,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 259,
          "lower": 1162.3359032959038,
          "upper": 1188.7860949024391,
          "category": "chinese",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_coding_rating",
          "value": 1155.1578617837847,
          "normalized_0_100": 51.99,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 283,
          "lower": 1144.2591809819987,
          "upper": 1166.0565425855707,
          "category": "coding",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_overall_rating",
          "value": 1137.2460135124177,
          "normalized_0_100": 49.86,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 291,
          "lower": 1131.1634788313672,
          "upper": 1143.3285481934681,
          "category": "overall",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        }
      ]
    },
    {
      "model_key": "zai-org-glm-4-7-flash",
      "display_name": "zai-org/GLM-4.7-Flash",
      "provider": "zai-org",
      "aliases": [
        "zai-org/GLM-4.7-Flash"
      ],
      "openness": "open",
      "license": "Unknown",
      "parameter_billions": 31.2,
      "context_window": null,
      "modalities": [
        "text"
      ],
      "source_ids": [
        "openevals"
      ],
      "composite": {
        "score_0_100": 49.6,
        "confidence": "low",
        "components": [
          {
            "source_id": "openevals",
            "metric": "openevals_aggregate",
            "normalized_0_100": 49.6,
            "weight": 0.45
          }
        ],
        "derived_from": [
          "openevals:openevals_aggregate"
        ],
        "limitations": [
          "This is an external model-intelligence prior, not a RunLocalAI local speed or fit measurement.",
          "Provider papers, human-preference ratings, and LiveBench judgments are not interchangeable with quantized local execution.",
          "Use this score only beside Will-It-Run fit, quantization, context, runtime, and measured tok/s evidence."
        ]
      },
      "scores": [
        {
          "source_id": "openevals",
          "metric": "gpqa",
          "value": 75.2,
          "normalized_0_100": 75.2,
          "scale": "0-100",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/OpenEvals/leaderboard-data",
          "source_dataset": "OpenEvals/leaderboard-data",
          "source_config": "default",
          "source_split": "train",
          "source_row_count": 105,
          "captured_at": "2026-05-28T11:57:28.766Z",
          "evidence_note": "External model quality prior from OpenEvals; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "openevals",
          "metric": "hle",
          "value": 14.4,
          "normalized_0_100": 14.4,
          "scale": "0-100",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/OpenEvals/leaderboard-data",
          "source_dataset": "OpenEvals/leaderboard-data",
          "source_config": "default",
          "source_split": "train",
          "source_row_count": 105,
          "captured_at": "2026-05-28T11:57:28.766Z",
          "evidence_note": "External model quality prior from OpenEvals; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "openevals",
          "metric": "openevals_aggregate",
          "value": 49.6,
          "normalized_0_100": 49.6,
          "scale": "0-100",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/OpenEvals/leaderboard-data",
          "source_dataset": "OpenEvals/leaderboard-data",
          "source_config": "default",
          "source_split": "train",
          "source_row_count": 105,
          "captured_at": "2026-05-28T11:57:28.766Z",
          "evidence_note": "External model quality prior from OpenEvals; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "openevals",
          "metric": "swe_verified",
          "value": 59.2,
          "normalized_0_100": 59.2,
          "scale": "0-100",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/OpenEvals/leaderboard-data",
          "source_dataset": "OpenEvals/leaderboard-data",
          "source_config": "default",
          "source_split": "train",
          "source_row_count": 105,
          "captured_at": "2026-05-28T11:57:28.766Z",
          "evidence_note": "External model quality prior from OpenEvals; not a RunLocalAI local execution measurement."
        }
      ]
    },
    {
      "model_key": "gemini-pro",
      "display_name": "gemini-pro",
      "provider": "google",
      "aliases": [
        "gemini-pro"
      ],
      "openness": null,
      "license": "Proprietary",
      "parameter_billions": null,
      "context_window": null,
      "modalities": [
        "text"
      ],
      "source_ids": [
        "lmarena"
      ],
      "composite": {
        "score_0_100": 49.07,
        "confidence": "low",
        "components": [
          {
            "source_id": "lmarena",
            "metric": "lmarena_text_overall_rating",
            "normalized_0_100": 49.07,
            "weight": 0.35
          }
        ],
        "derived_from": [
          "lmarena:lmarena_text_overall_rating"
        ],
        "limitations": [
          "This is an external model-intelligence prior, not a RunLocalAI local speed or fit measurement.",
          "Provider papers, human-preference ratings, and LiveBench judgments are not interchangeable with quantized local execution.",
          "Use this score only beside Will-It-Run fit, quantization, context, runtime, and measured tok/s evidence."
        ]
      },
      "scores": [
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_chinese_rating",
          "value": 1095.4529320869901,
          "normalized_0_100": 44.89,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 282,
          "lower": 1058.1489377803055,
          "upper": 1132.7569263936748,
          "category": "chinese",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_coding_rating",
          "value": 1108.4110206253065,
          "normalized_0_100": 46.43,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 301,
          "lower": 1085.1686803127022,
          "upper": 1131.6533609379107,
          "category": "coding",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_overall_rating",
          "value": 1130.5814333002343,
          "normalized_0_100": 49.07,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 294,
          "lower": 1119.1074762357225,
          "upper": 1142.055390364746,
          "category": "overall",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        }
      ]
    },
    {
      "model_key": "yi-34b-chat",
      "display_name": "yi-34b-chat",
      "provider": null,
      "aliases": [
        "yi-34b-chat"
      ],
      "openness": null,
      "license": "Yi License",
      "parameter_billions": null,
      "context_window": null,
      "modalities": [
        "text"
      ],
      "source_ids": [
        "lmarena"
      ],
      "composite": {
        "score_0_100": 48.82,
        "confidence": "low",
        "components": [
          {
            "source_id": "lmarena",
            "metric": "lmarena_text_overall_rating",
            "normalized_0_100": 48.82,
            "weight": 0.35
          }
        ],
        "derived_from": [
          "lmarena:lmarena_text_overall_rating"
        ],
        "limitations": [
          "This is an external model-intelligence prior, not a RunLocalAI local speed or fit measurement.",
          "Provider papers, human-preference ratings, and LiveBench judgments are not interchangeable with quantized local execution.",
          "Use this score only beside Will-It-Run fit, quantization, context, runtime, and measured tok/s evidence."
        ]
      },
      "scores": [
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_chinese_rating",
          "value": 1174.6423980551072,
          "normalized_0_100": 54.31,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 261,
          "lower": 1156.599370439164,
          "upper": 1192.6854256710503,
          "category": "chinese",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_coding_rating",
          "value": 1112.2777308320956,
          "normalized_0_100": 46.89,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 300,
          "lower": 1099.2847917130907,
          "upper": 1125.2706699511002,
          "category": "coding",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_overall_rating",
          "value": 1128.5134256538236,
          "normalized_0_100": 48.82,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 295,
          "lower": 1121.728687410549,
          "upper": 1135.2981638970982,
          "category": "overall",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        }
      ]
    },
    {
      "model_key": "qwen1-5-14b-chat",
      "display_name": "qwen1.5-14b-chat",
      "provider": "alibaba",
      "aliases": [
        "qwen1.5-14b-chat"
      ],
      "openness": null,
      "license": "Qianwen LICENSE",
      "parameter_billions": null,
      "context_window": null,
      "modalities": [
        "text"
      ],
      "source_ids": [
        "lmarena"
      ],
      "composite": {
        "score_0_100": 48.73,
        "confidence": "low",
        "components": [
          {
            "source_id": "lmarena",
            "metric": "lmarena_text_overall_rating",
            "normalized_0_100": 48.73,
            "weight": 0.35
          }
        ],
        "derived_from": [
          "lmarena:lmarena_text_overall_rating"
        ],
        "limitations": [
          "This is an external model-intelligence prior, not a RunLocalAI local speed or fit measurement.",
          "Provider papers, human-preference ratings, and LiveBench judgments are not interchangeable with quantized local execution.",
          "Use this score only beside Will-It-Run fit, quantization, context, runtime, and measured tok/s evidence."
        ]
      },
      "scores": [
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_chinese_rating",
          "value": 1146.1095195037672,
          "normalized_0_100": 50.92,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 264,
          "lower": 1132.2805828500916,
          "upper": 1159.9384561574427,
          "category": "chinese",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_coding_rating",
          "value": 1138.271363793634,
          "normalized_0_100": 49.99,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 288,
          "lower": 1125.497813883299,
          "upper": 1151.0449137039693,
          "category": "coding",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_overall_rating",
          "value": 1127.7367889145567,
          "normalized_0_100": 48.73,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 296,
          "lower": 1120.6820781571898,
          "upper": 1134.7914996719232,
          "category": "overall",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        }
      ]
    },
    {
      "model_key": "granite-3-1-2b-instruct",
      "display_name": "granite-3.1-2b-instruct",
      "provider": "ibm",
      "aliases": [
        "granite-3.1-2b-instruct"
      ],
      "openness": null,
      "license": "Apache 2.0",
      "parameter_billions": null,
      "context_window": null,
      "modalities": [
        "text"
      ],
      "source_ids": [
        "lmarena"
      ],
      "composite": {
        "score_0_100": 48.7,
        "confidence": "low",
        "components": [
          {
            "source_id": "lmarena",
            "metric": "lmarena_text_overall_rating",
            "normalized_0_100": 48.7,
            "weight": 0.35
          }
        ],
        "derived_from": [
          "lmarena:lmarena_text_overall_rating"
        ],
        "limitations": [
          "This is an external model-intelligence prior, not a RunLocalAI local speed or fit measurement.",
          "Provider papers, human-preference ratings, and LiveBench judgments are not interchangeable with quantized local execution.",
          "Use this score only beside Will-It-Run fit, quantization, context, runtime, and measured tok/s evidence."
        ]
      },
      "scores": [
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_chinese_rating",
          "value": 1137.4489084495478,
          "normalized_0_100": 49.89,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 269,
          "lower": 1099.4196862849476,
          "upper": 1175.4781306141479,
          "category": "chinese",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_coding_rating",
          "value": 1149.7967263308801,
          "normalized_0_100": 51.36,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 286,
          "lower": 1125.2753984525405,
          "upper": 1174.31805420922,
          "category": "coding",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_overall_rating",
          "value": 1127.4637865994418,
          "normalized_0_100": 48.7,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 297,
          "lower": 1116.3972194712474,
          "upper": 1138.5303537276359,
          "category": "overall",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        }
      ]
    },
    {
      "model_key": "minimaxai-minimax-m2",
      "display_name": "MiniMaxAI/MiniMax-M2",
      "provider": "MiniMaxAI",
      "aliases": [
        "MiniMaxAI/MiniMax-M2"
      ],
      "openness": "open",
      "license": "Unknown",
      "parameter_billions": 228.7,
      "context_window": null,
      "modalities": [
        "text"
      ],
      "source_ids": [
        "openevals"
      ],
      "composite": {
        "score_0_100": 48.48,
        "confidence": "low",
        "components": [
          {
            "source_id": "openevals",
            "metric": "openevals_aggregate",
            "normalized_0_100": 48.48,
            "weight": 0.45
          }
        ],
        "derived_from": [
          "openevals:openevals_aggregate"
        ],
        "limitations": [
          "This is an external model-intelligence prior, not a RunLocalAI local speed or fit measurement.",
          "Provider papers, human-preference ratings, and LiveBench judgments are not interchangeable with quantized local execution.",
          "Use this score only beside Will-It-Run fit, quantization, context, runtime, and measured tok/s evidence."
        ]
      },
      "scores": [
        {
          "source_id": "openevals",
          "metric": "hle",
          "value": 12.5,
          "normalized_0_100": 12.5,
          "scale": "0-100",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/OpenEvals/leaderboard-data",
          "source_dataset": "OpenEvals/leaderboard-data",
          "source_config": "default",
          "source_split": "train",
          "source_row_count": 105,
          "captured_at": "2026-05-28T11:57:28.766Z",
          "evidence_note": "External model quality prior from OpenEvals; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "openevals",
          "metric": "mmlu_pro",
          "value": 82,
          "normalized_0_100": 82,
          "scale": "0-100",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/OpenEvals/leaderboard-data",
          "source_dataset": "OpenEvals/leaderboard-data",
          "source_config": "default",
          "source_split": "train",
          "source_row_count": 105,
          "captured_at": "2026-05-28T11:57:28.766Z",
          "evidence_note": "External model quality prior from OpenEvals; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "openevals",
          "metric": "openevals_aggregate",
          "value": 48.48,
          "normalized_0_100": 48.48,
          "scale": "0-100",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/OpenEvals/leaderboard-data",
          "source_dataset": "OpenEvals/leaderboard-data",
          "source_config": "default",
          "source_split": "train",
          "source_row_count": 105,
          "captured_at": "2026-05-28T11:57:28.766Z",
          "evidence_note": "External model quality prior from OpenEvals; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "openevals",
          "metric": "swe_verified",
          "value": 69.4,
          "normalized_0_100": 69.4,
          "scale": "0-100",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/OpenEvals/leaderboard-data",
          "source_dataset": "OpenEvals/leaderboard-data",
          "source_config": "default",
          "source_split": "train",
          "source_row_count": 105,
          "captured_at": "2026-05-28T11:57:28.766Z",
          "evidence_note": "External model quality prior from OpenEvals; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "openevals",
          "metric": "terminal_bench",
          "value": 30,
          "normalized_0_100": 30,
          "scale": "0-100",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/OpenEvals/leaderboard-data",
          "source_dataset": "OpenEvals/leaderboard-data",
          "source_config": "default",
          "source_split": "train",
          "source_row_count": 105,
          "captured_at": "2026-05-28T11:57:28.766Z",
          "evidence_note": "External model quality prior from OpenEvals; not a RunLocalAI local execution measurement."
        }
      ]
    },
    {
      "model_key": "nanbeige-nanbeige4-1-3b",
      "display_name": "Nanbeige/Nanbeige4.1-3B",
      "provider": "Nanbeige",
      "aliases": [
        "Nanbeige/Nanbeige4.1-3B"
      ],
      "openness": "open",
      "license": "Unknown",
      "parameter_billions": 3.9,
      "context_window": null,
      "modalities": [
        "text"
      ],
      "source_ids": [
        "openevals"
      ],
      "composite": {
        "score_0_100": 48.2,
        "confidence": "low",
        "components": [
          {
            "source_id": "openevals",
            "metric": "openevals_aggregate",
            "normalized_0_100": 48.2,
            "weight": 0.45
          }
        ],
        "derived_from": [
          "openevals:openevals_aggregate"
        ],
        "limitations": [
          "This is an external model-intelligence prior, not a RunLocalAI local speed or fit measurement.",
          "Provider papers, human-preference ratings, and LiveBench judgments are not interchangeable with quantized local execution.",
          "Use this score only beside Will-It-Run fit, quantization, context, runtime, and measured tok/s evidence."
        ]
      },
      "scores": [
        {
          "source_id": "openevals",
          "metric": "gpqa",
          "value": 83.8,
          "normalized_0_100": 83.8,
          "scale": "0-100",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/OpenEvals/leaderboard-data",
          "source_dataset": "OpenEvals/leaderboard-data",
          "source_config": "default",
          "source_split": "train",
          "source_row_count": 105,
          "captured_at": "2026-05-28T11:57:28.766Z",
          "evidence_note": "External model quality prior from OpenEvals; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "openevals",
          "metric": "hle",
          "value": 12.6,
          "normalized_0_100": 12.6,
          "scale": "0-100",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/OpenEvals/leaderboard-data",
          "source_dataset": "OpenEvals/leaderboard-data",
          "source_config": "default",
          "source_split": "train",
          "source_row_count": 105,
          "captured_at": "2026-05-28T11:57:28.766Z",
          "evidence_note": "External model quality prior from OpenEvals; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "openevals",
          "metric": "openevals_aggregate",
          "value": 48.2,
          "normalized_0_100": 48.2,
          "scale": "0-100",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/OpenEvals/leaderboard-data",
          "source_dataset": "OpenEvals/leaderboard-data",
          "source_config": "default",
          "source_split": "train",
          "source_row_count": 105,
          "captured_at": "2026-05-28T11:57:28.766Z",
          "evidence_note": "External model quality prior from OpenEvals; not a RunLocalAI local execution measurement."
        }
      ]
    },
    {
      "model_key": "o3-mini-2025-01-31-high",
      "display_name": "o3-mini-2025-01-31-high",
      "provider": null,
      "aliases": [
        "o3-mini-2025-01-31-high"
      ],
      "openness": null,
      "license": null,
      "parameter_billions": null,
      "context_window": null,
      "modalities": [
        "text"
      ],
      "source_ids": [
        "livebench"
      ],
      "composite": {
        "score_0_100": 48.06,
        "confidence": "low",
        "components": [
          {
            "source_id": "livebench",
            "metric": "livebench_overall_mean_judgment",
            "normalized_0_100": 48.06,
            "weight": 0.2
          }
        ],
        "derived_from": [
          "livebench:livebench_overall_mean_judgment"
        ],
        "limitations": [
          "This is an external model-intelligence prior, not a RunLocalAI local speed or fit measurement.",
          "Provider papers, human-preference ratings, and LiveBench judgments are not interchangeable with quantized local execution.",
          "Use this score only beside Will-It-Run fit, quantization, context, runtime, and measured tok/s evidence."
        ]
      },
      "scores": [
        {
          "source_id": "livebench",
          "metric": "livebench_category_coding_mean_judgment",
          "value": 1,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "coding",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench category aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_category_instruction_following_mean_judgment",
          "value": 1,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "instruction_following",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench category aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_category_language_mean_judgment",
          "value": 0.272902,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "language",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench category aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_overall_mean_judgment",
          "value": 0.480644,
          "normalized_0_100": 48.06,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench judgment aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_task_coding_completion_mean_judgment",
          "value": 1,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "coding",
          "task": "coding_completion",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench task aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_task_paraphrase_mean_judgment",
          "value": 1,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "instruction_following",
          "task": "paraphrase",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench task aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_task_plot_unscrambling_mean_judgment",
          "value": 0.121503,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "language",
          "task": "plot_unscrambling",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench task aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_task_typos_mean_judgment",
          "value": 0.5,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "language",
          "task": "typos",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench task aggregate; not a RunLocalAI local execution measurement."
        }
      ]
    },
    {
      "model_key": "o3-mini-2025-01-31-medium",
      "display_name": "o3-mini-2025-01-31-medium",
      "provider": null,
      "aliases": [
        "o3-mini-2025-01-31-medium"
      ],
      "openness": null,
      "license": null,
      "parameter_billions": null,
      "context_window": null,
      "modalities": [
        "text"
      ],
      "source_ids": [
        "livebench"
      ],
      "composite": {
        "score_0_100": 48.01,
        "confidence": "low",
        "components": [
          {
            "source_id": "livebench",
            "metric": "livebench_overall_mean_judgment",
            "normalized_0_100": 48.01,
            "weight": 0.2
          }
        ],
        "derived_from": [
          "livebench:livebench_overall_mean_judgment"
        ],
        "limitations": [
          "This is an external model-intelligence prior, not a RunLocalAI local speed or fit measurement.",
          "Provider papers, human-preference ratings, and LiveBench judgments are not interchangeable with quantized local execution.",
          "Use this score only beside Will-It-Run fit, quantization, context, runtime, and measured tok/s evidence."
        ]
      },
      "scores": [
        {
          "source_id": "livebench",
          "metric": "livebench_category_coding_mean_judgment",
          "value": 1,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "coding",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench category aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_category_instruction_following_mean_judgment",
          "value": 1,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "instruction_following",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench category aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_category_language_mean_judgment",
          "value": 0.272131,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "language",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench category aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_overall_mean_judgment",
          "value": 0.480093,
          "normalized_0_100": 48.01,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench judgment aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_task_coding_completion_mean_judgment",
          "value": 1,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "coding",
          "task": "coding_completion",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench task aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_task_paraphrase_mean_judgment",
          "value": 1,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "instruction_following",
          "task": "paraphrase",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench task aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_task_plot_unscrambling_mean_judgment",
          "value": 0.120218,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "language",
          "task": "plot_unscrambling",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench task aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_task_typos_mean_judgment",
          "value": 0.5,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "language",
          "task": "typos",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench task aggregate; not a RunLocalAI local execution measurement."
        }
      ]
    },
    {
      "model_key": "tulu-2-dpo-70b",
      "display_name": "tulu-2-dpo-70b",
      "provider": null,
      "aliases": [
        "tulu-2-dpo-70b"
      ],
      "openness": null,
      "license": "AI2 ImpACT Low-risk",
      "parameter_billions": null,
      "context_window": null,
      "modalities": [
        "text"
      ],
      "source_ids": [
        "lmarena"
      ],
      "composite": {
        "score_0_100": 47.94,
        "confidence": "low",
        "components": [
          {
            "source_id": "lmarena",
            "metric": "lmarena_text_overall_rating",
            "normalized_0_100": 47.94,
            "weight": 0.35
          }
        ],
        "derived_from": [
          "lmarena:lmarena_text_overall_rating"
        ],
        "limitations": [
          "This is an external model-intelligence prior, not a RunLocalAI local speed or fit measurement.",
          "Provider papers, human-preference ratings, and LiveBench judgments are not interchangeable with quantized local execution.",
          "Use this score only beside Will-It-Run fit, quantization, context, runtime, and measured tok/s evidence."
        ]
      },
      "scores": [
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_chinese_rating",
          "value": 1012.5300892145187,
          "normalized_0_100": 35.03,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 311,
          "lower": 976.8265134425334,
          "upper": 1048.233664986504,
          "category": "chinese",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_coding_rating",
          "value": 1116.6938622535429,
          "normalized_0_100": 47.42,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 295,
          "lower": 1095.6337847073307,
          "upper": 1137.7539397997552,
          "category": "coding",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_overall_rating",
          "value": 1121.0778115317564,
          "normalized_0_100": 47.94,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 299,
          "lower": 1111.3587673062868,
          "upper": 1130.7968557572262,
          "category": "overall",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        }
      ]
    },
    {
      "model_key": "wizardlm-70b",
      "display_name": "wizardlm-70b",
      "provider": "microsoft",
      "aliases": [
        "wizardlm-70b"
      ],
      "openness": null,
      "license": "Llama 2 Community",
      "parameter_billions": null,
      "context_window": null,
      "modalities": [
        "text"
      ],
      "source_ids": [
        "lmarena"
      ],
      "composite": {
        "score_0_100": 47.76,
        "confidence": "low",
        "components": [
          {
            "source_id": "lmarena",
            "metric": "lmarena_text_overall_rating",
            "normalized_0_100": 47.76,
            "weight": 0.35
          }
        ],
        "derived_from": [
          "lmarena:lmarena_text_overall_rating"
        ],
        "limitations": [
          "This is an external model-intelligence prior, not a RunLocalAI local speed or fit measurement.",
          "Provider papers, human-preference ratings, and LiveBench judgments are not interchangeable with quantized local execution.",
          "Use this score only beside Will-It-Run fit, quantization, context, runtime, and measured tok/s evidence."
        ]
      },
      "scores": [
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_chinese_rating",
          "value": 1051.459823558715,
          "normalized_0_100": 39.66,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 298,
          "lower": 1019.0890566747177,
          "upper": 1083.8305904427125,
          "category": "chinese",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_overall_rating",
          "value": 1119.5634234746499,
          "normalized_0_100": 47.76,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 300,
          "lower": 1110.199645860941,
          "upper": 1128.9272010883587,
          "category": "overall",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        }
      ]
    },
    {
      "model_key": "dbrx-instruct-preview",
      "display_name": "dbrx-instruct-preview",
      "provider": null,
      "aliases": [
        "dbrx-instruct-preview"
      ],
      "openness": null,
      "license": "DBRX LICENSE",
      "parameter_billions": null,
      "context_window": null,
      "modalities": [
        "text"
      ],
      "source_ids": [
        "lmarena"
      ],
      "composite": {
        "score_0_100": 47.68,
        "confidence": "low",
        "components": [
          {
            "source_id": "lmarena",
            "metric": "lmarena_text_overall_rating",
            "normalized_0_100": 47.68,
            "weight": 0.35
          }
        ],
        "derived_from": [
          "lmarena:lmarena_text_overall_rating"
        ],
        "limitations": [
          "This is an external model-intelligence prior, not a RunLocalAI local speed or fit measurement.",
          "Provider papers, human-preference ratings, and LiveBench judgments are not interchangeable with quantized local execution.",
          "Use this score only beside Will-It-Run fit, quantization, context, runtime, and measured tok/s evidence."
        ]
      },
      "scores": [
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_chinese_rating",
          "value": 1066.865239487479,
          "normalized_0_100": 41.49,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 291,
          "lower": 1053.9193960727403,
          "upper": 1079.8110829022178,
          "category": "chinese",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_coding_rating",
          "value": 1132.271460003591,
          "normalized_0_100": 49.27,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 290,
          "lower": 1121.3823578520019,
          "upper": 1143.1605621551803,
          "category": "coding",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_overall_rating",
          "value": 1118.9117856346422,
          "normalized_0_100": 47.68,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 301,
          "lower": 1112.8515875228295,
          "upper": 1124.971983746455,
          "category": "overall",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        }
      ]
    },
    {
      "model_key": "llama-2-70b-chat",
      "display_name": "llama-2-70b-chat",
      "provider": "meta",
      "aliases": [
        "llama-2-70b-chat"
      ],
      "openness": null,
      "license": "Llama 2 Community",
      "parameter_billions": null,
      "context_window": null,
      "modalities": [
        "text"
      ],
      "source_ids": [
        "lmarena"
      ],
      "composite": {
        "score_0_100": 47.25,
        "confidence": "low",
        "components": [
          {
            "source_id": "lmarena",
            "metric": "lmarena_text_overall_rating",
            "normalized_0_100": 47.25,
            "weight": 0.35
          }
        ],
        "derived_from": [
          "lmarena:lmarena_text_overall_rating"
        ],
        "limitations": [
          "This is an external model-intelligence prior, not a RunLocalAI local speed or fit measurement.",
          "Provider papers, human-preference ratings, and LiveBench judgments are not interchangeable with quantized local execution.",
          "Use this score only beside Will-It-Run fit, quantization, context, runtime, and measured tok/s evidence."
        ]
      },
      "scores": [
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_chinese_rating",
          "value": 993.6857836758153,
          "normalized_0_100": 32.79,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 316,
          "lower": 980.4287643624073,
          "upper": 1006.9428029892233,
          "category": "chinese",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_overall_rating",
          "value": 1115.2707015574847,
          "normalized_0_100": 47.25,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 302,
          "lower": 1109.8041124450529,
          "upper": 1120.7372906699165,
          "category": "overall",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        }
      ]
    },
    {
      "model_key": "qwen-qwen3-coder-480b-a35b-instruct",
      "display_name": "Qwen/Qwen3-Coder-480B-A35B-Instruct",
      "provider": "Qwen",
      "aliases": [
        "Qwen/Qwen3-Coder-480B-A35B-Instruct"
      ],
      "openness": "open",
      "license": "Unknown",
      "parameter_billions": 480.2,
      "context_window": null,
      "modalities": [
        "text"
      ],
      "source_ids": [
        "openevals"
      ],
      "composite": {
        "score_0_100": 46.92,
        "confidence": "low",
        "components": [
          {
            "source_id": "openevals",
            "metric": "openevals_aggregate",
            "normalized_0_100": 46.92,
            "weight": 0.45
          }
        ],
        "derived_from": [
          "openevals:openevals_aggregate"
        ],
        "limitations": [
          "This is an external model-intelligence prior, not a RunLocalAI local speed or fit measurement.",
          "Provider papers, human-preference ratings, and LiveBench judgments are not interchangeable with quantized local execution.",
          "Use this score only beside Will-It-Run fit, quantization, context, runtime, and measured tok/s evidence."
        ]
      },
      "scores": [
        {
          "source_id": "openevals",
          "metric": "evasion_bench",
          "value": 78.16,
          "normalized_0_100": 78.16,
          "scale": "0-100",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/OpenEvals/leaderboard-data",
          "source_dataset": "OpenEvals/leaderboard-data",
          "source_config": "default",
          "source_split": "train",
          "source_row_count": 105,
          "captured_at": "2026-05-28T11:57:28.766Z",
          "evidence_note": "External model quality prior from OpenEvals; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "openevals",
          "metric": "openevals_aggregate",
          "value": 46.92,
          "normalized_0_100": 46.92,
          "scale": "0-100",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/OpenEvals/leaderboard-data",
          "source_dataset": "OpenEvals/leaderboard-data",
          "source_config": "default",
          "source_split": "train",
          "source_row_count": 105,
          "captured_at": "2026-05-28T11:57:28.766Z",
          "evidence_note": "External model quality prior from OpenEvals; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "openevals",
          "metric": "swe_pro",
          "value": 38.7,
          "normalized_0_100": 38.7,
          "scale": "0-100",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/OpenEvals/leaderboard-data",
          "source_dataset": "OpenEvals/leaderboard-data",
          "source_config": "default",
          "source_split": "train",
          "source_row_count": 105,
          "captured_at": "2026-05-28T11:57:28.766Z",
          "evidence_note": "External model quality prior from OpenEvals; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "openevals",
          "metric": "terminal_bench",
          "value": 23.9,
          "normalized_0_100": 23.9,
          "scale": "0-100",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/OpenEvals/leaderboard-data",
          "source_dataset": "OpenEvals/leaderboard-data",
          "source_config": "default",
          "source_split": "train",
          "source_row_count": 105,
          "captured_at": "2026-05-28T11:57:28.766Z",
          "evidence_note": "External model quality prior from OpenEvals; not a RunLocalAI local execution measurement."
        }
      ]
    },
    {
      "model_key": "nvidia-nvidia-nemotron-3-nano-30b-a3b-bf16",
      "display_name": "nvidia/NVIDIA-Nemotron-3-Nano-30B-A3B-BF16",
      "provider": "nvidia",
      "aliases": [
        "nvidia/NVIDIA-Nemotron-3-Nano-30B-A3B-BF16"
      ],
      "openness": "open",
      "license": "Unknown",
      "parameter_billions": 31.6,
      "context_window": null,
      "modalities": [
        "text"
      ],
      "source_ids": [
        "openevals"
      ],
      "composite": {
        "score_0_100": 46.9,
        "confidence": "low",
        "components": [
          {
            "source_id": "openevals",
            "metric": "openevals_aggregate",
            "normalized_0_100": 46.9,
            "weight": 0.45
          }
        ],
        "derived_from": [
          "openevals:openevals_aggregate"
        ],
        "limitations": [
          "This is an external model-intelligence prior, not a RunLocalAI local speed or fit measurement.",
          "Provider papers, human-preference ratings, and LiveBench judgments are not interchangeable with quantized local execution.",
          "Use this score only beside Will-It-Run fit, quantization, context, runtime, and measured tok/s evidence."
        ]
      },
      "scores": [
        {
          "source_id": "openevals",
          "metric": "hle",
          "value": 15.5,
          "normalized_0_100": 15.5,
          "scale": "0-100",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/OpenEvals/leaderboard-data",
          "source_dataset": "OpenEvals/leaderboard-data",
          "source_config": "default",
          "source_split": "train",
          "source_row_count": 105,
          "captured_at": "2026-05-28T11:57:28.766Z",
          "evidence_note": "External model quality prior from OpenEvals; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "openevals",
          "metric": "mmlu_pro",
          "value": 78.3,
          "normalized_0_100": 78.3,
          "scale": "0-100",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/OpenEvals/leaderboard-data",
          "source_dataset": "OpenEvals/leaderboard-data",
          "source_config": "default",
          "source_split": "train",
          "source_row_count": 105,
          "captured_at": "2026-05-28T11:57:28.766Z",
          "evidence_note": "External model quality prior from OpenEvals; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "openevals",
          "metric": "openevals_aggregate",
          "value": 46.9,
          "normalized_0_100": 46.9,
          "scale": "0-100",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/OpenEvals/leaderboard-data",
          "source_dataset": "OpenEvals/leaderboard-data",
          "source_config": "default",
          "source_split": "train",
          "source_row_count": 105,
          "captured_at": "2026-05-28T11:57:28.766Z",
          "evidence_note": "External model quality prior from OpenEvals; not a RunLocalAI local execution measurement."
        }
      ]
    },
    {
      "model_key": "nous-hermes-2-mixtral-8x7b-dpo",
      "display_name": "nous-hermes-2-mixtral-8x7b-dpo",
      "provider": null,
      "aliases": [
        "nous-hermes-2-mixtral-8x7b-dpo"
      ],
      "openness": null,
      "license": "Apache-2.0",
      "parameter_billions": null,
      "context_window": null,
      "modalities": [
        "text"
      ],
      "source_ids": [
        "lmarena"
      ],
      "composite": {
        "score_0_100": 46.87,
        "confidence": "low",
        "components": [
          {
            "source_id": "lmarena",
            "metric": "lmarena_text_overall_rating",
            "normalized_0_100": 46.87,
            "weight": 0.35
          }
        ],
        "derived_from": [
          "lmarena:lmarena_text_overall_rating"
        ],
        "limitations": [
          "This is an external model-intelligence prior, not a RunLocalAI local speed or fit measurement.",
          "Provider papers, human-preference ratings, and LiveBench judgments are not interchangeable with quantized local execution.",
          "Use this score only beside Will-It-Run fit, quantization, context, runtime, and measured tok/s evidence."
        ]
      },
      "scores": [
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_overall_rating",
          "value": 1112.0815758567417,
          "normalized_0_100": 46.87,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 303,
          "lower": 1100.2562214620057,
          "upper": 1123.9069302514777,
          "category": "overall",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        }
      ]
    },
    {
      "model_key": "llama-3-2-3b-instruct",
      "display_name": "llama-3.2-3b-instruct",
      "provider": "meta",
      "aliases": [
        "llama-3.2-3b-instruct"
      ],
      "openness": null,
      "license": "Llama 3.2",
      "parameter_billions": null,
      "context_window": null,
      "modalities": [
        "text"
      ],
      "source_ids": [
        "lmarena"
      ],
      "composite": {
        "score_0_100": 46.57,
        "confidence": "low",
        "components": [
          {
            "source_id": "lmarena",
            "metric": "lmarena_text_overall_rating",
            "normalized_0_100": 46.57,
            "weight": 0.35
          }
        ],
        "derived_from": [
          "lmarena:lmarena_text_overall_rating"
        ],
        "limitations": [
          "This is an external model-intelligence prior, not a RunLocalAI local speed or fit measurement.",
          "Provider papers, human-preference ratings, and LiveBench judgments are not interchangeable with quantized local execution.",
          "Use this score only beside Will-It-Run fit, quantization, context, runtime, and measured tok/s evidence."
        ]
      },
      "scores": [
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_chinese_rating",
          "value": 1016.3234575160795,
          "normalized_0_100": 35.48,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 307,
          "lower": 991.9347110543843,
          "upper": 1040.712203977775,
          "category": "chinese",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_coding_rating",
          "value": 1098.230988125546,
          "normalized_0_100": 45.22,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 305,
          "lower": 1082.3892939239727,
          "upper": 1114.0726823271193,
          "category": "coding",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_overall_rating",
          "value": 1109.577555669796,
          "normalized_0_100": 46.57,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 305,
          "lower": 1102.017535504077,
          "upper": 1117.1375758355152,
          "category": "overall",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        }
      ]
    },
    {
      "model_key": "gemini-2-0-pro-exp-02-05",
      "display_name": "gemini-2.0-pro-exp-02-05",
      "provider": null,
      "aliases": [
        "gemini-2.0-pro-exp-02-05"
      ],
      "openness": null,
      "license": null,
      "parameter_billions": null,
      "context_window": null,
      "modalities": [
        "text"
      ],
      "source_ids": [
        "livebench"
      ],
      "composite": {
        "score_0_100": 46.52,
        "confidence": "low",
        "components": [
          {
            "source_id": "livebench",
            "metric": "livebench_overall_mean_judgment",
            "normalized_0_100": 46.52,
            "weight": 0.2
          }
        ],
        "derived_from": [
          "livebench:livebench_overall_mean_judgment"
        ],
        "limitations": [
          "This is an external model-intelligence prior, not a RunLocalAI local speed or fit measurement.",
          "Provider papers, human-preference ratings, and LiveBench judgments are not interchangeable with quantized local execution.",
          "Use this score only beside Will-It-Run fit, quantization, context, runtime, and measured tok/s evidence."
        ]
      },
      "scores": [
        {
          "source_id": "livebench",
          "metric": "livebench_category_coding_mean_judgment",
          "value": 1,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "coding",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench category aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_category_instruction_following_mean_judgment",
          "value": 0.4,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "instruction_following",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench category aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_category_language_mean_judgment",
          "value": 0.371337,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "language",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench category aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_overall_mean_judgment",
          "value": 0.465241,
          "normalized_0_100": 46.52,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench judgment aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_task_lcb_generation_mean_judgment",
          "value": 1,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "coding",
          "task": "LCB_generation",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench task aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_task_paraphrase_mean_judgment",
          "value": 0.4,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "instruction_following",
          "task": "paraphrase",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench task aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_task_plot_unscrambling_mean_judgment",
          "value": 0.428342,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "language",
          "task": "plot_unscrambling",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench task aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_task_typos_mean_judgment",
          "value": 0.333333,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "language",
          "task": "typos",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench task aggregate; not a RunLocalAI local execution measurement."
        }
      ]
    },
    {
      "model_key": "starling-lm-7b-alpha",
      "display_name": "starling-lm-7b-alpha",
      "provider": null,
      "aliases": [
        "starling-lm-7b-alpha"
      ],
      "openness": null,
      "license": "CC-BY-NC-4.0",
      "parameter_billions": null,
      "context_window": null,
      "modalities": [
        "text"
      ],
      "source_ids": [
        "lmarena"
      ],
      "composite": {
        "score_0_100": 46.35,
        "confidence": "low",
        "components": [
          {
            "source_id": "lmarena",
            "metric": "lmarena_text_overall_rating",
            "normalized_0_100": 46.35,
            "weight": 0.35
          }
        ],
        "derived_from": [
          "lmarena:lmarena_text_overall_rating"
        ],
        "limitations": [
          "This is an external model-intelligence prior, not a RunLocalAI local speed or fit measurement.",
          "Provider papers, human-preference ratings, and LiveBench judgments are not interchangeable with quantized local execution.",
          "Use this score only beside Will-It-Run fit, quantization, context, runtime, and measured tok/s evidence."
        ]
      },
      "scores": [
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_chinese_rating",
          "value": 1044.266072661834,
          "normalized_0_100": 38.81,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 299,
          "lower": 1022.7156076180643,
          "upper": 1065.8165377056037,
          "category": "chinese",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_coding_rating",
          "value": 1098.023596812986,
          "normalized_0_100": 45.2,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 306,
          "lower": 1081.7540184755537,
          "upper": 1114.293175150418,
          "category": "coding",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_overall_rating",
          "value": 1107.7036110786366,
          "normalized_0_100": 46.35,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 306,
          "lower": 1099.7711490199977,
          "upper": 1115.6360731372754,
          "category": "overall",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        }
      ]
    },
    {
      "model_key": "openchat-3-5-0106",
      "display_name": "openchat-3.5-0106",
      "provider": null,
      "aliases": [
        "openchat-3.5-0106"
      ],
      "openness": null,
      "license": "Apache-2.0",
      "parameter_billions": null,
      "context_window": null,
      "modalities": [
        "text"
      ],
      "source_ids": [
        "lmarena"
      ],
      "composite": {
        "score_0_100": 46.23,
        "confidence": "low",
        "components": [
          {
            "source_id": "lmarena",
            "metric": "lmarena_text_overall_rating",
            "normalized_0_100": 46.23,
            "weight": 0.35
          }
        ],
        "derived_from": [
          "lmarena:lmarena_text_overall_rating"
        ],
        "limitations": [
          "This is an external model-intelligence prior, not a RunLocalAI local speed or fit measurement.",
          "Provider papers, human-preference ratings, and LiveBench judgments are not interchangeable with quantized local execution.",
          "Use this score only beside Will-It-Run fit, quantization, context, runtime, and measured tok/s evidence."
        ]
      },
      "scores": [
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_chinese_rating",
          "value": 1082.7788797476894,
          "normalized_0_100": 43.39,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 283,
          "lower": 1065.4182517154095,
          "upper": 1100.1395077799693,
          "category": "chinese",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_coding_rating",
          "value": 1113.4229789515132,
          "normalized_0_100": 47.03,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 297,
          "lower": 1099.4670551992876,
          "upper": 1127.3789027037385,
          "category": "coding",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_overall_rating",
          "value": 1106.6511535239806,
          "normalized_0_100": 46.23,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 307,
          "lower": 1098.7319103443026,
          "upper": 1114.5703967036588,
          "category": "overall",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        }
      ]
    },
    {
      "model_key": "o3-mini-2025-01-31-low",
      "display_name": "o3-mini-2025-01-31-low",
      "provider": null,
      "aliases": [
        "o3-mini-2025-01-31-low"
      ],
      "openness": null,
      "license": null,
      "parameter_billions": null,
      "context_window": null,
      "modalities": [
        "text"
      ],
      "source_ids": [
        "livebench"
      ],
      "composite": {
        "score_0_100": 46.16,
        "confidence": "low",
        "components": [
          {
            "source_id": "livebench",
            "metric": "livebench_overall_mean_judgment",
            "normalized_0_100": 46.16,
            "weight": 0.2
          }
        ],
        "derived_from": [
          "livebench:livebench_overall_mean_judgment"
        ],
        "limitations": [
          "This is an external model-intelligence prior, not a RunLocalAI local speed or fit measurement.",
          "Provider papers, human-preference ratings, and LiveBench judgments are not interchangeable with quantized local execution.",
          "Use this score only beside Will-It-Run fit, quantization, context, runtime, and measured tok/s evidence."
        ]
      },
      "scores": [
        {
          "source_id": "livebench",
          "metric": "livebench_category_coding_mean_judgment",
          "value": 1,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "coding",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench category aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_category_instruction_following_mean_judgment",
          "value": 1,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "instruction_following",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench category aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_category_language_mean_judgment",
          "value": 0.246203,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "language",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench category aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_overall_mean_judgment",
          "value": 0.461574,
          "normalized_0_100": 46.16,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench judgment aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_task_coding_completion_mean_judgment",
          "value": 1,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "coding",
          "task": "coding_completion",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench task aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_task_paraphrase_mean_judgment",
          "value": 1,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "instruction_following",
          "task": "paraphrase",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench task aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_task_plot_unscrambling_mean_judgment",
          "value": 0.077005,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "language",
          "task": "plot_unscrambling",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench task aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_task_typos_mean_judgment",
          "value": 0.5,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "language",
          "task": "typos",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench task aggregate; not a RunLocalAI local execution measurement."
        }
      ]
    },
    {
      "model_key": "vicuna-33b",
      "display_name": "vicuna-33b",
      "provider": null,
      "aliases": [
        "vicuna-33b"
      ],
      "openness": null,
      "license": "Non-commercial",
      "parameter_billions": null,
      "context_window": null,
      "modalities": [
        "text"
      ],
      "source_ids": [
        "lmarena"
      ],
      "composite": {
        "score_0_100": 46.06,
        "confidence": "low",
        "components": [
          {
            "source_id": "lmarena",
            "metric": "lmarena_text_overall_rating",
            "normalized_0_100": 46.06,
            "weight": 0.35
          }
        ],
        "derived_from": [
          "lmarena:lmarena_text_overall_rating"
        ],
        "limitations": [
          "This is an external model-intelligence prior, not a RunLocalAI local speed or fit measurement.",
          "Provider papers, human-preference ratings, and LiveBench judgments are not interchangeable with quantized local execution.",
          "Use this score only beside Will-It-Run fit, quantization, context, runtime, and measured tok/s evidence."
        ]
      },
      "scores": [
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_chinese_rating",
          "value": 1034.9071865368956,
          "normalized_0_100": 37.69,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 301,
          "lower": 1017.7840112734103,
          "upper": 1052.030361800381,
          "category": "chinese",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_overall_rating",
          "value": 1105.3025669086428,
          "normalized_0_100": 46.06,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 308,
          "lower": 1099.1678479315813,
          "upper": 1111.4372858857043,
          "category": "overall",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        }
      ]
    },
    {
      "model_key": "deepseek-llm-67b-chat",
      "display_name": "deepseek-llm-67b-chat",
      "provider": "deepseek",
      "aliases": [
        "deepseek-llm-67b-chat"
      ],
      "openness": null,
      "license": "DeepSeek License",
      "parameter_billions": null,
      "context_window": null,
      "modalities": [
        "text"
      ],
      "source_ids": [
        "lmarena"
      ],
      "composite": {
        "score_0_100": 46.04,
        "confidence": "low",
        "components": [
          {
            "source_id": "lmarena",
            "metric": "lmarena_text_overall_rating",
            "normalized_0_100": 46.04,
            "weight": 0.35
          }
        ],
        "derived_from": [
          "lmarena:lmarena_text_overall_rating"
        ],
        "limitations": [
          "This is an external model-intelligence prior, not a RunLocalAI local speed or fit measurement.",
          "Provider papers, human-preference ratings, and LiveBench judgments are not interchangeable with quantized local execution.",
          "Use this score only beside Will-It-Run fit, quantization, context, runtime, and measured tok/s evidence."
        ]
      },
      "scores": [
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_chinese_rating",
          "value": 1130.85499758354,
          "normalized_0_100": 49.1,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 274,
          "lower": 1091.2042800221884,
          "upper": 1170.5057151448914,
          "category": "chinese",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_coding_rating",
          "value": 1096.085597367385,
          "normalized_0_100": 44.97,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 307,
          "lower": 1072.4306563415412,
          "upper": 1119.740538393229,
          "category": "coding",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_overall_rating",
          "value": 1105.072013876746,
          "normalized_0_100": 46.04,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 309,
          "lower": 1093.4971732700753,
          "upper": 1116.646854483417,
          "category": "overall",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        }
      ]
    },
    {
      "model_key": "gemini-exp-1206",
      "display_name": "gemini-exp-1206",
      "provider": null,
      "aliases": [
        "gemini-exp-1206"
      ],
      "openness": null,
      "license": null,
      "parameter_billions": null,
      "context_window": null,
      "modalities": [
        "text"
      ],
      "source_ids": [
        "livebench"
      ],
      "composite": {
        "score_0_100": 45.79,
        "confidence": "low",
        "components": [
          {
            "source_id": "livebench",
            "metric": "livebench_overall_mean_judgment",
            "normalized_0_100": 45.79,
            "weight": 0.2
          }
        ],
        "derived_from": [
          "livebench:livebench_overall_mean_judgment"
        ],
        "limitations": [
          "This is an external model-intelligence prior, not a RunLocalAI local speed or fit measurement.",
          "Provider papers, human-preference ratings, and LiveBench judgments are not interchangeable with quantized local execution.",
          "Use this score only beside Will-It-Run fit, quantization, context, runtime, and measured tok/s evidence."
        ]
      },
      "scores": [
        {
          "source_id": "livebench",
          "metric": "livebench_category_coding_mean_judgment",
          "value": 1,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "coding",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench category aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_category_instruction_following_mean_judgment",
          "value": 1,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "instruction_following",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench category aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_category_language_mean_judgment",
          "value": 0.186898,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "language",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench category aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_overall_mean_judgment",
          "value": 0.457932,
          "normalized_0_100": 45.79,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench judgment aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_task_lcb_generation_mean_judgment",
          "value": 1,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "coding",
          "task": "LCB_generation",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench task aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_task_paraphrase_mean_judgment",
          "value": 1,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "instruction_following",
          "task": "paraphrase",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench task aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_task_plot_unscrambling_mean_judgment",
          "value": 0.373797,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "language",
          "task": "plot_unscrambling",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench task aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_task_typos_mean_judgment",
          "value": 0,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "language",
          "task": "typos",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench task aggregate; not a RunLocalAI local execution measurement."
        }
      ]
    },
    {
      "model_key": "snowflake-arctic-instruct",
      "display_name": "snowflake-arctic-instruct",
      "provider": null,
      "aliases": [
        "snowflake-arctic-instruct"
      ],
      "openness": null,
      "license": "Apache 2.0",
      "parameter_billions": null,
      "context_window": null,
      "modalities": [
        "text"
      ],
      "source_ids": [
        "lmarena"
      ],
      "composite": {
        "score_0_100": 45.56,
        "confidence": "low",
        "components": [
          {
            "source_id": "lmarena",
            "metric": "lmarena_text_overall_rating",
            "normalized_0_100": 45.56,
            "weight": 0.35
          }
        ],
        "derived_from": [
          "lmarena:lmarena_text_overall_rating"
        ],
        "limitations": [
          "This is an external model-intelligence prior, not a RunLocalAI local speed or fit measurement.",
          "Provider papers, human-preference ratings, and LiveBench judgments are not interchangeable with quantized local execution.",
          "Use this score only beside Will-It-Run fit, quantization, context, runtime, and measured tok/s evidence."
        ]
      },
      "scores": [
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_chinese_rating",
          "value": 1066.1283617277336,
          "normalized_0_100": 41.41,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 292,
          "lower": 1052.535800736815,
          "upper": 1079.7209227186522,
          "category": "chinese",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_overall_rating",
          "value": 1101.018089263002,
          "normalized_0_100": 45.56,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 310,
          "lower": 1095.1278274453416,
          "upper": 1106.9083510806622,
          "category": "overall",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        }
      ]
    },
    {
      "model_key": "llama2-70b-steerlm-chat",
      "display_name": "llama2-70b-steerlm-chat",
      "provider": "nvidia",
      "aliases": [
        "llama2-70b-steerlm-chat"
      ],
      "openness": null,
      "license": "Llama 2 Community",
      "parameter_billions": null,
      "context_window": null,
      "modalities": [
        "text"
      ],
      "source_ids": [
        "lmarena"
      ],
      "composite": {
        "score_0_100": 45.21,
        "confidence": "low",
        "components": [
          {
            "source_id": "lmarena",
            "metric": "lmarena_text_overall_rating",
            "normalized_0_100": 45.21,
            "weight": 0.35
          }
        ],
        "derived_from": [
          "lmarena:lmarena_text_overall_rating"
        ],
        "limitations": [
          "This is an external model-intelligence prior, not a RunLocalAI local speed or fit measurement.",
          "Provider papers, human-preference ratings, and LiveBench judgments are not interchangeable with quantized local execution.",
          "Use this score only beside Will-It-Run fit, quantization, context, runtime, and measured tok/s evidence."
        ]
      },
      "scores": [
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_overall_rating",
          "value": 1098.1329126453877,
          "normalized_0_100": 45.21,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 311,
          "lower": 1085.6366623912495,
          "upper": 1110.629162899526,
          "category": "overall",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        }
      ]
    },
    {
      "model_key": "openchat-3-5",
      "display_name": "openchat-3.5",
      "provider": null,
      "aliases": [
        "openchat-3.5"
      ],
      "openness": null,
      "license": "Apache-2.0",
      "parameter_billions": null,
      "context_window": null,
      "modalities": [
        "text"
      ],
      "source_ids": [
        "lmarena"
      ],
      "composite": {
        "score_0_100": 45.09,
        "confidence": "low",
        "components": [
          {
            "source_id": "lmarena",
            "metric": "lmarena_text_overall_rating",
            "normalized_0_100": 45.09,
            "weight": 0.35
          }
        ],
        "derived_from": [
          "lmarena:lmarena_text_overall_rating"
        ],
        "limitations": [
          "This is an external model-intelligence prior, not a RunLocalAI local speed or fit measurement.",
          "Provider papers, human-preference ratings, and LiveBench judgments are not interchangeable with quantized local execution.",
          "Use this score only beside Will-It-Run fit, quantization, context, runtime, and measured tok/s evidence."
        ]
      },
      "scores": [
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_chinese_rating",
          "value": 1069.0496266085065,
          "normalized_0_100": 41.75,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 289,
          "lower": 1037.0990721174035,
          "upper": 1101.0001810996096,
          "category": "chinese",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_overall_rating",
          "value": 1097.074546341542,
          "normalized_0_100": 45.09,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 312,
          "lower": 1087.4185585090408,
          "upper": 1106.730534174043,
          "category": "overall",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        }
      ]
    },
    {
      "model_key": "granite-3-0-8b-instruct",
      "display_name": "granite-3.0-8b-instruct",
      "provider": "ibm",
      "aliases": [
        "granite-3.0-8b-instruct"
      ],
      "openness": null,
      "license": "Apache 2.0",
      "parameter_billions": null,
      "context_window": null,
      "modalities": [
        "text"
      ],
      "source_ids": [
        "lmarena"
      ],
      "composite": {
        "score_0_100": 45.02,
        "confidence": "low",
        "components": [
          {
            "source_id": "lmarena",
            "metric": "lmarena_text_overall_rating",
            "normalized_0_100": 45.02,
            "weight": 0.35
          }
        ],
        "derived_from": [
          "lmarena:lmarena_text_overall_rating"
        ],
        "limitations": [
          "This is an external model-intelligence prior, not a RunLocalAI local speed or fit measurement.",
          "Provider papers, human-preference ratings, and LiveBench judgments are not interchangeable with quantized local execution.",
          "Use this score only beside Will-It-Run fit, quantization, context, runtime, and measured tok/s evidence."
        ]
      },
      "scores": [
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_chinese_rating",
          "value": 1062.1457797268638,
          "normalized_0_100": 40.93,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 294,
          "lower": 1038.891305957793,
          "upper": 1085.4002534959345,
          "category": "chinese",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_coding_rating",
          "value": 1113.0050136928173,
          "normalized_0_100": 46.98,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 298,
          "lower": 1095.131711033687,
          "upper": 1130.8783163519477,
          "category": "coding",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_overall_rating",
          "value": 1096.4803415597212,
          "normalized_0_100": 45.02,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 313,
          "lower": 1087.9280903797435,
          "upper": 1105.0325927396984,
          "category": "overall",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        }
      ]
    },
    {
      "model_key": "mistral-small-2402",
      "display_name": "mistral-small-2402",
      "provider": null,
      "aliases": [
        "mistral-small-2402"
      ],
      "openness": null,
      "license": null,
      "parameter_billions": null,
      "context_window": null,
      "modalities": [
        "text"
      ],
      "source_ids": [
        "livebench"
      ],
      "composite": {
        "score_0_100": 44.77,
        "confidence": "low",
        "components": [
          {
            "source_id": "livebench",
            "metric": "livebench_overall_mean_judgment",
            "normalized_0_100": 44.77,
            "weight": 0.2
          }
        ],
        "derived_from": [
          "livebench:livebench_overall_mean_judgment"
        ],
        "limitations": [
          "This is an external model-intelligence prior, not a RunLocalAI local speed or fit measurement.",
          "Provider papers, human-preference ratings, and LiveBench judgments are not interchangeable with quantized local execution.",
          "Use this score only beside Will-It-Run fit, quantization, context, runtime, and measured tok/s evidence."
        ]
      },
      "scores": [
        {
          "source_id": "livebench",
          "metric": "livebench_category_coding_mean_judgment",
          "value": 0,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "coding",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench category aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_category_instruction_following_mean_judgment",
          "value": 0.666667,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "instruction_following",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench category aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_category_language_mean_judgment",
          "value": 0.45098,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "language",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench category aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_overall_mean_judgment",
          "value": 0.447712,
          "normalized_0_100": 44.77,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench judgment aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_task_coding_completion_mean_judgment",
          "value": 0,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "coding",
          "task": "coding_completion",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench task aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_task_connections_mean_judgment",
          "value": 0,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "language",
          "task": "connections",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench task aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_task_paraphrase_mean_judgment",
          "value": 0.333333,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "instruction_following",
          "task": "paraphrase",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench task aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_task_plot_unscrambling_mean_judgment",
          "value": 0.352941,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "language",
          "task": "plot_unscrambling",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench task aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_task_story_generation_mean_judgment",
          "value": 1,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "instruction_following",
          "task": "story_generation",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench task aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_task_typos_mean_judgment",
          "value": 1,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "language",
          "task": "typos",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench task aggregate; not a RunLocalAI local execution measurement."
        }
      ]
    },
    {
      "model_key": "meta-llama-3-1-405b-instruct-turbo",
      "display_name": "meta-llama-3.1-405b-instruct-turbo",
      "provider": null,
      "aliases": [
        "Meta-Llama-3.1-405B-Instruct-Turbo",
        "meta-llama-3.1-405b-instruct-turbo"
      ],
      "openness": null,
      "license": null,
      "parameter_billions": null,
      "context_window": null,
      "modalities": [
        "text"
      ],
      "source_ids": [
        "livebench"
      ],
      "composite": {
        "score_0_100": 44.37,
        "confidence": "low",
        "components": [
          {
            "source_id": "livebench",
            "metric": "livebench_overall_mean_judgment",
            "normalized_0_100": 44.37,
            "weight": 0.2
          }
        ],
        "derived_from": [
          "livebench:livebench_overall_mean_judgment"
        ],
        "limitations": [
          "This is an external model-intelligence prior, not a RunLocalAI local speed or fit measurement.",
          "Provider papers, human-preference ratings, and LiveBench judgments are not interchangeable with quantized local execution.",
          "Use this score only beside Will-It-Run fit, quantization, context, runtime, and measured tok/s evidence."
        ]
      },
      "scores": [
        {
          "source_id": "livebench",
          "metric": "livebench_category_coding_mean_judgment",
          "value": 0,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "coding",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench category aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_category_coding_mean_judgment",
          "value": 0.333333,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "coding",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench category aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_category_instruction_following_mean_judgment",
          "value": 1,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "instruction_following",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench category aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_category_instruction_following_mean_judgment",
          "value": 1,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "instruction_following",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench category aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_category_language_mean_judgment",
          "value": 0.421193,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "language",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench category aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_category_language_mean_judgment",
          "value": 0.345238,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "language",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench category aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_overall_mean_judgment",
          "value": 0.44371,
          "normalized_0_100": 44.37,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench judgment aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_overall_mean_judgment",
          "value": 0.448413,
          "normalized_0_100": 44.84,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench judgment aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_task_coding_completion_mean_judgment",
          "value": 0,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "coding",
          "task": "coding_completion",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench task aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_task_connections_mean_judgment",
          "value": 0.5,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "language",
          "task": "connections",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench task aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_task_lcb_generation_mean_judgment",
          "value": 0.333333,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "coding",
          "task": "LCB_generation",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench task aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_task_paraphrase_mean_judgment",
          "value": 1,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "instruction_following",
          "task": "paraphrase",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench task aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_task_plot_unscrambling_mean_judgment",
          "value": 0.368656,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "language",
          "task": "plot_unscrambling",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench task aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_task_plot_unscrambling_mean_judgment",
          "value": 0.190476,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "language",
          "task": "plot_unscrambling",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench task aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_task_story_generation_mean_judgment",
          "value": 1,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "instruction_following",
          "task": "story_generation",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench task aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_task_typos_mean_judgment",
          "value": 0.5,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "language",
          "task": "typos",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench task aggregate; not a RunLocalAI local execution measurement."
        }
      ]
    },
    {
      "model_key": "phi-3-medium-128k-instruct",
      "display_name": "Phi-3-medium-128k-instruct",
      "provider": null,
      "aliases": [
        "Phi-3-medium-128k-instruct"
      ],
      "openness": null,
      "license": null,
      "parameter_billions": null,
      "context_window": null,
      "modalities": [
        "text"
      ],
      "source_ids": [
        "livebench"
      ],
      "composite": {
        "score_0_100": 44.22,
        "confidence": "low",
        "components": [
          {
            "source_id": "livebench",
            "metric": "livebench_overall_mean_judgment",
            "normalized_0_100": 44.22,
            "weight": 0.2
          }
        ],
        "derived_from": [
          "livebench:livebench_overall_mean_judgment"
        ],
        "limitations": [
          "This is an external model-intelligence prior, not a RunLocalAI local speed or fit measurement.",
          "Provider papers, human-preference ratings, and LiveBench judgments are not interchangeable with quantized local execution.",
          "Use this score only beside Will-It-Run fit, quantization, context, runtime, and measured tok/s evidence."
        ]
      },
      "scores": [
        {
          "source_id": "livebench",
          "metric": "livebench_category_coding_mean_judgment",
          "value": 0.25,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "coding",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench category aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_category_instruction_following_mean_judgment",
          "value": 1,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "instruction_following",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench category aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_category_language_mean_judgment",
          "value": 0.547619,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "language",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench category aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_overall_mean_judgment",
          "value": 0.442177,
          "normalized_0_100": 44.22,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench judgment aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_task_coding_completion_mean_judgment",
          "value": 0,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "coding",
          "task": "coding_completion",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench task aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_task_connections_mean_judgment",
          "value": 1,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "language",
          "task": "connections",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench task aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_task_lcb_generation_mean_judgment",
          "value": 0.333333,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "coding",
          "task": "LCB_generation",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench task aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_task_plot_unscrambling_mean_judgment",
          "value": 0.095238,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "language",
          "task": "plot_unscrambling",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench task aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_task_story_generation_mean_judgment",
          "value": 1,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "instruction_following",
          "task": "story_generation",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench task aggregate; not a RunLocalAI local execution measurement."
        }
      ]
    },
    {
      "model_key": "nvidia-nvidia-nemotron-3-nano-30b-a3b-fp8",
      "display_name": "nvidia/NVIDIA-Nemotron-3-Nano-30B-A3B-FP8",
      "provider": "nvidia",
      "aliases": [
        "nvidia/NVIDIA-Nemotron-3-Nano-30B-A3B-FP8"
      ],
      "openness": "open",
      "license": "Unknown",
      "parameter_billions": 31.6,
      "context_window": null,
      "modalities": [
        "text"
      ],
      "source_ids": [
        "openevals"
      ],
      "composite": {
        "score_0_100": 44.15,
        "confidence": "low",
        "components": [
          {
            "source_id": "openevals",
            "metric": "openevals_aggregate",
            "normalized_0_100": 44.15,
            "weight": 0.45
          }
        ],
        "derived_from": [
          "openevals:openevals_aggregate"
        ],
        "limitations": [
          "This is an external model-intelligence prior, not a RunLocalAI local speed or fit measurement.",
          "Provider papers, human-preference ratings, and LiveBench judgments are not interchangeable with quantized local execution.",
          "Use this score only beside Will-It-Run fit, quantization, context, runtime, and measured tok/s evidence."
        ]
      },
      "scores": [
        {
          "source_id": "openevals",
          "metric": "hle",
          "value": 10.2,
          "normalized_0_100": 10.2,
          "scale": "0-100",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/OpenEvals/leaderboard-data",
          "source_dataset": "OpenEvals/leaderboard-data",
          "source_config": "default",
          "source_split": "train",
          "source_row_count": 105,
          "captured_at": "2026-05-28T11:57:28.766Z",
          "evidence_note": "External model quality prior from OpenEvals; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "openevals",
          "metric": "mmlu_pro",
          "value": 78.1,
          "normalized_0_100": 78.1,
          "scale": "0-100",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/OpenEvals/leaderboard-data",
          "source_dataset": "OpenEvals/leaderboard-data",
          "source_config": "default",
          "source_split": "train",
          "source_row_count": 105,
          "captured_at": "2026-05-28T11:57:28.766Z",
          "evidence_note": "External model quality prior from OpenEvals; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "openevals",
          "metric": "openevals_aggregate",
          "value": 44.15,
          "normalized_0_100": 44.15,
          "scale": "0-100",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/OpenEvals/leaderboard-data",
          "source_dataset": "OpenEvals/leaderboard-data",
          "source_config": "default",
          "source_split": "train",
          "source_row_count": 105,
          "captured_at": "2026-05-28T11:57:28.766Z",
          "evidence_note": "External model quality prior from OpenEvals; not a RunLocalAI local execution measurement."
        }
      ]
    },
    {
      "model_key": "llama-2-13b-chat",
      "display_name": "llama-2-13b-chat",
      "provider": "meta",
      "aliases": [
        "llama-2-13b-chat"
      ],
      "openness": null,
      "license": "Llama 2 Community",
      "parameter_billions": null,
      "context_window": null,
      "modalities": [
        "text"
      ],
      "source_ids": [
        "lmarena"
      ],
      "composite": {
        "score_0_100": 43.59,
        "confidence": "low",
        "components": [
          {
            "source_id": "lmarena",
            "metric": "lmarena_text_overall_rating",
            "normalized_0_100": 43.59,
            "weight": 0.35
          }
        ],
        "derived_from": [
          "lmarena:lmarena_text_overall_rating"
        ],
        "limitations": [
          "This is an external model-intelligence prior, not a RunLocalAI local speed or fit measurement.",
          "Provider papers, human-preference ratings, and LiveBench judgments are not interchangeable with quantized local execution.",
          "Use this score only beside Will-It-Run fit, quantization, context, runtime, and measured tok/s evidence."
        ]
      },
      "scores": [
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_chinese_rating",
          "value": 1000.0644443825589,
          "normalized_0_100": 33.55,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 315,
          "lower": 981.3220909825611,
          "upper": 1018.8067977825567,
          "category": "chinese",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_overall_rating",
          "value": 1084.4993644299298,
          "normalized_0_100": 43.59,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 318,
          "lower": 1077.8262484460615,
          "upper": 1091.1724804137982,
          "category": "overall",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        }
      ]
    },
    {
      "model_key": "solar-10-7b-instruct-v1-0",
      "display_name": "solar-10.7b-instruct-v1.0",
      "provider": null,
      "aliases": [
        "solar-10.7b-instruct-v1.0"
      ],
      "openness": null,
      "license": "CC-BY-NC-4.0",
      "parameter_billions": null,
      "context_window": null,
      "modalities": [
        "text"
      ],
      "source_ids": [
        "lmarena"
      ],
      "composite": {
        "score_0_100": 43.43,
        "confidence": "low",
        "components": [
          {
            "source_id": "lmarena",
            "metric": "lmarena_text_overall_rating",
            "normalized_0_100": 43.43,
            "weight": 0.35
          }
        ],
        "derived_from": [
          "lmarena:lmarena_text_overall_rating"
        ],
        "limitations": [
          "This is an external model-intelligence prior, not a RunLocalAI local speed or fit measurement.",
          "Provider papers, human-preference ratings, and LiveBench judgments are not interchangeable with quantized local execution.",
          "Use this score only beside Will-It-Run fit, quantization, context, runtime, and measured tok/s evidence."
        ]
      },
      "scores": [
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_overall_rating",
          "value": 1083.1294221854723,
          "normalized_0_100": 43.43,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 320,
          "lower": 1070.0207552705501,
          "upper": 1096.2380891003943,
          "category": "overall",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        }
      ]
    },
    {
      "model_key": "dolphin-2-2-1-mistral-7b",
      "display_name": "dolphin-2.2.1-mistral-7b",
      "provider": null,
      "aliases": [
        "dolphin-2.2.1-mistral-7b"
      ],
      "openness": null,
      "license": "Apache-2.0",
      "parameter_billions": null,
      "context_window": null,
      "modalities": [
        "text"
      ],
      "source_ids": [
        "lmarena"
      ],
      "composite": {
        "score_0_100": 43.18,
        "confidence": "low",
        "components": [
          {
            "source_id": "lmarena",
            "metric": "lmarena_text_overall_rating",
            "normalized_0_100": 43.18,
            "weight": 0.35
          }
        ],
        "derived_from": [
          "lmarena:lmarena_text_overall_rating"
        ],
        "limitations": [
          "This is an external model-intelligence prior, not a RunLocalAI local speed or fit measurement.",
          "Provider papers, human-preference ratings, and LiveBench judgments are not interchangeable with quantized local execution.",
          "Use this score only beside Will-It-Run fit, quantization, context, runtime, and measured tok/s evidence."
        ]
      },
      "scores": [
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_overall_rating",
          "value": 1081.0150222923028,
          "normalized_0_100": 43.18,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 321,
          "lower": 1065.6388342427713,
          "upper": 1096.391210341834,
          "category": "overall",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        }
      ]
    },
    {
      "model_key": "phi-3-mini-4k-instruct-june-2024",
      "display_name": "phi-3-mini-4k-instruct-june-2024",
      "provider": "microsoft",
      "aliases": [
        "phi-3-mini-4k-instruct-june-2024"
      ],
      "openness": null,
      "license": "MIT",
      "parameter_billions": null,
      "context_window": null,
      "modalities": [
        "text"
      ],
      "source_ids": [
        "lmarena"
      ],
      "composite": {
        "score_0_100": 43.06,
        "confidence": "low",
        "components": [
          {
            "source_id": "lmarena",
            "metric": "lmarena_text_overall_rating",
            "normalized_0_100": 43.06,
            "weight": 0.35
          }
        ],
        "derived_from": [
          "lmarena:lmarena_text_overall_rating"
        ],
        "limitations": [
          "This is an external model-intelligence prior, not a RunLocalAI local speed or fit measurement.",
          "Provider papers, human-preference ratings, and LiveBench judgments are not interchangeable with quantized local execution.",
          "Use this score only beside Will-It-Run fit, quantization, context, runtime, and measured tok/s evidence."
        ]
      },
      "scores": [
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_chinese_rating",
          "value": 1032.459567065404,
          "normalized_0_100": 37.4,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 303,
          "lower": 1014.718353194068,
          "upper": 1050.2007809367399,
          "category": "chinese",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_coding_rating",
          "value": 1093.6023252642558,
          "normalized_0_100": 44.67,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 308,
          "lower": 1079.843556949089,
          "upper": 1107.3610935794222,
          "category": "coding",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_overall_rating",
          "value": 1079.9984411296834,
          "normalized_0_100": 43.06,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 322,
          "lower": 1073.6499325121888,
          "upper": 1086.3469497471779,
          "category": "overall",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        }
      ]
    },
    {
      "model_key": "granite-3-0-2b-instruct",
      "display_name": "granite-3.0-2b-instruct",
      "provider": "ibm",
      "aliases": [
        "granite-3.0-2b-instruct"
      ],
      "openness": null,
      "license": "Apache 2.0",
      "parameter_billions": null,
      "context_window": null,
      "modalities": [
        "text"
      ],
      "source_ids": [
        "lmarena"
      ],
      "composite": {
        "score_0_100": 43.04,
        "confidence": "low",
        "components": [
          {
            "source_id": "lmarena",
            "metric": "lmarena_text_overall_rating",
            "normalized_0_100": 43.04,
            "weight": 0.35
          }
        ],
        "derived_from": [
          "lmarena:lmarena_text_overall_rating"
        ],
        "limitations": [
          "This is an external model-intelligence prior, not a RunLocalAI local speed or fit measurement.",
          "Provider papers, human-preference ratings, and LiveBench judgments are not interchangeable with quantized local execution.",
          "Use this score only beside Will-It-Run fit, quantization, context, runtime, and measured tok/s evidence."
        ]
      },
      "scores": [
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_chinese_rating",
          "value": 1069.0448722173787,
          "normalized_0_100": 41.75,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 290,
          "lower": 1046.2648927311848,
          "upper": 1091.8248517035727,
          "category": "chinese",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_overall_rating",
          "value": 1079.8837223484472,
          "normalized_0_100": 43.04,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 323,
          "lower": 1071.663541631918,
          "upper": 1088.1039030649763,
          "category": "overall",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        }
      ]
    },
    {
      "model_key": "wizardlm-13b",
      "display_name": "wizardlm-13b",
      "provider": "microsoft",
      "aliases": [
        "wizardlm-13b"
      ],
      "openness": null,
      "license": "Llama 2 Community",
      "parameter_billions": null,
      "context_window": null,
      "modalities": [
        "text"
      ],
      "source_ids": [
        "lmarena"
      ],
      "composite": {
        "score_0_100": 42.65,
        "confidence": "low",
        "components": [
          {
            "source_id": "lmarena",
            "metric": "lmarena_text_overall_rating",
            "normalized_0_100": 42.65,
            "weight": 0.35
          }
        ],
        "derived_from": [
          "lmarena:lmarena_text_overall_rating"
        ],
        "limitations": [
          "This is an external model-intelligence prior, not a RunLocalAI local speed or fit measurement.",
          "Provider papers, human-preference ratings, and LiveBench judgments are not interchangeable with quantized local execution.",
          "Use this score only beside Will-It-Run fit, quantization, context, runtime, and measured tok/s evidence."
        ]
      },
      "scores": [
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_chinese_rating",
          "value": 1021.7895521876193,
          "normalized_0_100": 36.13,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 305,
          "lower": 988.8868936217766,
          "upper": 1054.692210753462,
          "category": "chinese",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_overall_rating",
          "value": 1076.5795742482037,
          "normalized_0_100": 42.65,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 324,
          "lower": 1067.3046735645396,
          "upper": 1085.854474931868,
          "category": "overall",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        }
      ]
    },
    {
      "model_key": "swe-lego-swe-lego-qwen3-8b",
      "display_name": "SWE-Lego/SWE-Lego-Qwen3-8B",
      "provider": "SWE-Lego",
      "aliases": [
        "SWE-Lego/SWE-Lego-Qwen3-8B"
      ],
      "openness": "open",
      "license": "Unknown",
      "parameter_billions": 0,
      "context_window": null,
      "modalities": [
        "text"
      ],
      "source_ids": [
        "openevals"
      ],
      "composite": {
        "score_0_100": 42.2,
        "confidence": "low",
        "components": [
          {
            "source_id": "openevals",
            "metric": "openevals_aggregate",
            "normalized_0_100": 42.2,
            "weight": 0.45
          }
        ],
        "derived_from": [
          "openevals:openevals_aggregate"
        ],
        "limitations": [
          "This is an external model-intelligence prior, not a RunLocalAI local speed or fit measurement.",
          "Provider papers, human-preference ratings, and LiveBench judgments are not interchangeable with quantized local execution.",
          "Use this score only beside Will-It-Run fit, quantization, context, runtime, and measured tok/s evidence."
        ]
      },
      "scores": [
        {
          "source_id": "openevals",
          "metric": "openevals_aggregate",
          "value": 42.2,
          "normalized_0_100": 42.2,
          "scale": "0-100",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/OpenEvals/leaderboard-data",
          "source_dataset": "OpenEvals/leaderboard-data",
          "source_config": "default",
          "source_split": "train",
          "source_row_count": 105,
          "captured_at": "2026-05-28T11:57:28.766Z",
          "evidence_note": "External model quality prior from OpenEvals; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "openevals",
          "metric": "swe_verified",
          "value": 42.2,
          "normalized_0_100": 42.2,
          "scale": "0-100",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/OpenEvals/leaderboard-data",
          "source_dataset": "OpenEvals/leaderboard-data",
          "source_config": "default",
          "source_split": "train",
          "source_row_count": 105,
          "captured_at": "2026-05-28T11:57:28.766Z",
          "evidence_note": "External model quality prior from OpenEvals; not a RunLocalAI local execution measurement."
        }
      ]
    },
    {
      "model_key": "dracarys2-72b-instruct",
      "display_name": "dracarys2-72b-instruct",
      "provider": null,
      "aliases": [
        "Dracarys2-72B-Instruct",
        "dracarys2-72b-instruct"
      ],
      "openness": null,
      "license": null,
      "parameter_billions": null,
      "context_window": null,
      "modalities": [
        "text"
      ],
      "source_ids": [
        "livebench"
      ],
      "composite": {
        "score_0_100": 42.12,
        "confidence": "low",
        "components": [
          {
            "source_id": "livebench",
            "metric": "livebench_overall_mean_judgment",
            "normalized_0_100": 42.12,
            "weight": 0.2
          }
        ],
        "derived_from": [
          "livebench:livebench_overall_mean_judgment"
        ],
        "limitations": [
          "This is an external model-intelligence prior, not a RunLocalAI local speed or fit measurement.",
          "Provider papers, human-preference ratings, and LiveBench judgments are not interchangeable with quantized local execution.",
          "Use this score only beside Will-It-Run fit, quantization, context, runtime, and measured tok/s evidence."
        ]
      },
      "scores": [
        {
          "source_id": "livebench",
          "metric": "livebench_category_coding_mean_judgment",
          "value": 1,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "coding",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench category aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_category_instruction_following_mean_judgment",
          "value": 0.4,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "instruction_following",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench category aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_category_instruction_following_mean_judgment",
          "value": 0.4,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "instruction_following",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench category aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_category_language_mean_judgment",
          "value": 0.281818,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "language",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench category aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_overall_mean_judgment",
          "value": 0.421212,
          "normalized_0_100": 42.12,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench judgment aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_overall_mean_judgment",
          "value": 0.4,
          "normalized_0_100": 40,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench judgment aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_task_lcb_generation_mean_judgment",
          "value": 1,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "coding",
          "task": "LCB_generation",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench task aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_task_paraphrase_mean_judgment",
          "value": 0.4,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "instruction_following",
          "task": "paraphrase",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench task aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_task_paraphrase_mean_judgment",
          "value": 0.4,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "instruction_following",
          "task": "paraphrase",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench task aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_task_plot_unscrambling_mean_judgment",
          "value": 0.127273,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "language",
          "task": "plot_unscrambling",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench task aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_task_typos_mean_judgment",
          "value": 0.333333,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "language",
          "task": "typos",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench task aggregate; not a RunLocalAI local execution measurement."
        }
      ]
    },
    {
      "model_key": "mpt-30b-chat",
      "display_name": "mpt-30b-chat",
      "provider": null,
      "aliases": [
        "mpt-30b-chat"
      ],
      "openness": null,
      "license": "CC-BY-NC-SA-4.0",
      "parameter_billions": null,
      "context_window": null,
      "modalities": [
        "text"
      ],
      "source_ids": [
        "lmarena"
      ],
      "composite": {
        "score_0_100": 41.71,
        "confidence": "low",
        "components": [
          {
            "source_id": "lmarena",
            "metric": "lmarena_text_overall_rating",
            "normalized_0_100": 41.71,
            "weight": 0.35
          }
        ],
        "derived_from": [
          "lmarena:lmarena_text_overall_rating"
        ],
        "limitations": [
          "This is an external model-intelligence prior, not a RunLocalAI local speed or fit measurement.",
          "Provider papers, human-preference ratings, and LiveBench judgments are not interchangeable with quantized local execution.",
          "Use this score only beside Will-It-Run fit, quantization, context, runtime, and measured tok/s evidence."
        ]
      },
      "scores": [
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_overall_rating",
          "value": 1068.650545712046,
          "normalized_0_100": 41.71,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 327,
          "lower": 1056.4760165872322,
          "upper": 1080.82507483686,
          "category": "overall",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        }
      ]
    },
    {
      "model_key": "tiiuae-falcon-h1r-7b",
      "display_name": "tiiuae/Falcon-H1R-7B",
      "provider": "tiiuae",
      "aliases": [
        "tiiuae/Falcon-H1R-7B"
      ],
      "openness": "open",
      "license": "Unknown",
      "parameter_billions": null,
      "context_window": null,
      "modalities": [
        "text"
      ],
      "source_ids": [
        "openevals"
      ],
      "composite": {
        "score_0_100": 41.6,
        "confidence": "low",
        "components": [
          {
            "source_id": "openevals",
            "metric": "openevals_aggregate",
            "normalized_0_100": 41.6,
            "weight": 0.45
          }
        ],
        "derived_from": [
          "openevals:openevals_aggregate"
        ],
        "limitations": [
          "This is an external model-intelligence prior, not a RunLocalAI local speed or fit measurement.",
          "Provider papers, human-preference ratings, and LiveBench judgments are not interchangeable with quantized local execution.",
          "Use this score only beside Will-It-Run fit, quantization, context, runtime, and measured tok/s evidence."
        ]
      },
      "scores": [
        {
          "source_id": "openevals",
          "metric": "hle",
          "value": 11.1,
          "normalized_0_100": 11.1,
          "scale": "0-100",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/OpenEvals/leaderboard-data",
          "source_dataset": "OpenEvals/leaderboard-data",
          "source_config": "default",
          "source_split": "train",
          "source_row_count": 105,
          "captured_at": "2026-05-28T11:57:28.766Z",
          "evidence_note": "External model quality prior from OpenEvals; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "openevals",
          "metric": "mmlu_pro",
          "value": 72.1,
          "normalized_0_100": 72.1,
          "scale": "0-100",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/OpenEvals/leaderboard-data",
          "source_dataset": "OpenEvals/leaderboard-data",
          "source_config": "default",
          "source_split": "train",
          "source_row_count": 105,
          "captured_at": "2026-05-28T11:57:28.766Z",
          "evidence_note": "External model quality prior from OpenEvals; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "openevals",
          "metric": "openevals_aggregate",
          "value": 41.6,
          "normalized_0_100": 41.6,
          "scale": "0-100",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/OpenEvals/leaderboard-data",
          "source_dataset": "OpenEvals/leaderboard-data",
          "source_config": "default",
          "source_split": "train",
          "source_row_count": 105,
          "captured_at": "2026-05-28T11:57:28.766Z",
          "evidence_note": "External model quality prior from OpenEvals; not a RunLocalAI local execution measurement."
        }
      ]
    },
    {
      "model_key": "codellama-34b-instruct",
      "display_name": "codellama-34b-instruct",
      "provider": "meta",
      "aliases": [
        "codellama-34b-instruct"
      ],
      "openness": null,
      "license": "Llama 2 Community",
      "parameter_billions": null,
      "context_window": null,
      "modalities": [
        "text"
      ],
      "source_ids": [
        "lmarena"
      ],
      "composite": {
        "score_0_100": 41.33,
        "confidence": "low",
        "components": [
          {
            "source_id": "lmarena",
            "metric": "lmarena_text_overall_rating",
            "normalized_0_100": 41.33,
            "weight": 0.35
          }
        ],
        "derived_from": [
          "lmarena:lmarena_text_overall_rating"
        ],
        "limitations": [
          "This is an external model-intelligence prior, not a RunLocalAI local speed or fit measurement.",
          "Provider papers, human-preference ratings, and LiveBench judgments are not interchangeable with quantized local execution.",
          "Use this score only beside Will-It-Run fit, quantization, context, runtime, and measured tok/s evidence."
        ]
      },
      "scores": [
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_chinese_rating",
          "value": 975.4997765880569,
          "normalized_0_100": 30.63,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 319,
          "lower": 937.6426124528141,
          "upper": 1013.3569407232999,
          "category": "chinese",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_overall_rating",
          "value": 1065.5253531798335,
          "normalized_0_100": 41.33,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 328,
          "lower": 1056.7305705333138,
          "upper": 1074.3201358263532,
          "category": "overall",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        }
      ]
    },
    {
      "model_key": "meta-llama-3-8b-instruct",
      "display_name": "Meta-Llama-3-8B-Instruct",
      "provider": null,
      "aliases": [
        "Meta-Llama-3-8B-Instruct"
      ],
      "openness": null,
      "license": null,
      "parameter_billions": null,
      "context_window": null,
      "modalities": [
        "text"
      ],
      "source_ids": [
        "livebench"
      ],
      "composite": {
        "score_0_100": 41.17,
        "confidence": "low",
        "components": [
          {
            "source_id": "livebench",
            "metric": "livebench_overall_mean_judgment",
            "normalized_0_100": 41.17,
            "weight": 0.2
          }
        ],
        "derived_from": [
          "livebench:livebench_overall_mean_judgment"
        ],
        "limitations": [
          "This is an external model-intelligence prior, not a RunLocalAI local speed or fit measurement.",
          "Provider papers, human-preference ratings, and LiveBench judgments are not interchangeable with quantized local execution.",
          "Use this score only beside Will-It-Run fit, quantization, context, runtime, and measured tok/s evidence."
        ]
      },
      "scores": [
        {
          "source_id": "livebench",
          "metric": "livebench_category_coding_mean_judgment",
          "value": 0.5,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "coding",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench category aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_category_instruction_following_mean_judgment",
          "value": 0.6875,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "instruction_following",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench category aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_category_language_mean_judgment",
          "value": 0.047619,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "language",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench category aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_overall_mean_judgment",
          "value": 0.411706,
          "normalized_0_100": 41.17,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench judgment aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_task_coding_completion_mean_judgment",
          "value": 0,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "coding",
          "task": "coding_completion",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench task aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_task_connections_mean_judgment",
          "value": 0,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "language",
          "task": "connections",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench task aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_task_lcb_generation_mean_judgment",
          "value": 1,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "coding",
          "task": "LCB_generation",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench task aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_task_plot_unscrambling_mean_judgment",
          "value": 0.095238,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "language",
          "task": "plot_unscrambling",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench task aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_task_story_generation_mean_judgment",
          "value": 0.6875,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "instruction_following",
          "task": "story_generation",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench task aggregate; not a RunLocalAI local execution measurement."
        }
      ]
    },
    {
      "model_key": "learnlm-1-5-pro-experimental",
      "display_name": "learnlm-1.5-pro-experimental",
      "provider": null,
      "aliases": [
        "learnlm-1.5-pro-experimental"
      ],
      "openness": null,
      "license": null,
      "parameter_billions": null,
      "context_window": null,
      "modalities": [
        "text"
      ],
      "source_ids": [
        "livebench"
      ],
      "composite": {
        "score_0_100": 40.96,
        "confidence": "low",
        "components": [
          {
            "source_id": "livebench",
            "metric": "livebench_overall_mean_judgment",
            "normalized_0_100": 40.96,
            "weight": 0.2
          }
        ],
        "derived_from": [
          "livebench:livebench_overall_mean_judgment"
        ],
        "limitations": [
          "This is an external model-intelligence prior, not a RunLocalAI local speed or fit measurement.",
          "Provider papers, human-preference ratings, and LiveBench judgments are not interchangeable with quantized local execution.",
          "Use this score only beside Will-It-Run fit, quantization, context, runtime, and measured tok/s evidence."
        ]
      },
      "scores": [
        {
          "source_id": "livebench",
          "metric": "livebench_category_coding_mean_judgment",
          "value": 0.5,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "coding",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench category aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_category_instruction_following_mean_judgment",
          "value": 0.333333,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "instruction_following",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench category aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_category_language_mean_judgment",
          "value": 0.388759,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "language",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench category aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_overall_mean_judgment",
          "value": 0.409641,
          "normalized_0_100": 40.96,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench judgment aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_task_coding_completion_mean_judgment",
          "value": 0,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "coding",
          "task": "coding_completion",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench task aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_task_lcb_generation_mean_judgment",
          "value": 1,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "coding",
          "task": "LCB_generation",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench task aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_task_paraphrase_mean_judgment",
          "value": 0.333333,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "instruction_following",
          "task": "paraphrase",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench task aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_task_plot_unscrambling_mean_judgment",
          "value": 0.314598,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "language",
          "task": "plot_unscrambling",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench task aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_task_typos_mean_judgment",
          "value": 0.5,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "language",
          "task": "typos",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench task aggregate; not a RunLocalAI local execution measurement."
        }
      ]
    },
    {
      "model_key": "grok-2-1212",
      "display_name": "grok-2-1212",
      "provider": null,
      "aliases": [
        "grok-2-1212"
      ],
      "openness": null,
      "license": null,
      "parameter_billions": null,
      "context_window": null,
      "modalities": [
        "text"
      ],
      "source_ids": [
        "livebench"
      ],
      "composite": {
        "score_0_100": 40.85,
        "confidence": "low",
        "components": [
          {
            "source_id": "livebench",
            "metric": "livebench_overall_mean_judgment",
            "normalized_0_100": 40.85,
            "weight": 0.2
          }
        ],
        "derived_from": [
          "livebench:livebench_overall_mean_judgment"
        ],
        "limitations": [
          "This is an external model-intelligence prior, not a RunLocalAI local speed or fit measurement.",
          "Provider papers, human-preference ratings, and LiveBench judgments are not interchangeable with quantized local execution.",
          "Use this score only beside Will-It-Run fit, quantization, context, runtime, and measured tok/s evidence."
        ]
      },
      "scores": [
        {
          "source_id": "livebench",
          "metric": "livebench_category_coding_mean_judgment",
          "value": 0.5,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "coding",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench category aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_category_instruction_following_mean_judgment",
          "value": 0.166667,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "instruction_following",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench category aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_category_language_mean_judgment",
          "value": 0.423262,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "language",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench category aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_overall_mean_judgment",
          "value": 0.408531,
          "normalized_0_100": 40.85,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench judgment aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_task_coding_completion_mean_judgment",
          "value": 0,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "coding",
          "task": "coding_completion",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench task aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_task_lcb_generation_mean_judgment",
          "value": 1,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "coding",
          "task": "LCB_generation",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench task aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_task_paraphrase_mean_judgment",
          "value": 0.166667,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "instruction_following",
          "task": "paraphrase",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench task aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_task_plot_unscrambling_mean_judgment",
          "value": 0.346524,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "language",
          "task": "plot_unscrambling",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench task aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_task_typos_mean_judgment",
          "value": 0.5,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "language",
          "task": "typos",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench task aggregate; not a RunLocalAI local execution measurement."
        }
      ]
    },
    {
      "model_key": "hermes-3-llama-3-1-70b",
      "display_name": "hermes-3-llama-3.1-70b",
      "provider": null,
      "aliases": [
        "hermes-3-llama-3.1-70b"
      ],
      "openness": null,
      "license": null,
      "parameter_billions": null,
      "context_window": null,
      "modalities": [
        "text"
      ],
      "source_ids": [
        "livebench"
      ],
      "composite": {
        "score_0_100": 40.76,
        "confidence": "low",
        "components": [
          {
            "source_id": "livebench",
            "metric": "livebench_overall_mean_judgment",
            "normalized_0_100": 40.76,
            "weight": 0.2
          }
        ],
        "derived_from": [
          "livebench:livebench_overall_mean_judgment"
        ],
        "limitations": [
          "This is an external model-intelligence prior, not a RunLocalAI local speed or fit measurement.",
          "Provider papers, human-preference ratings, and LiveBench judgments are not interchangeable with quantized local execution.",
          "Use this score only beside Will-It-Run fit, quantization, context, runtime, and measured tok/s evidence."
        ]
      },
      "scores": [
        {
          "source_id": "livebench",
          "metric": "livebench_category_coding_mean_judgment",
          "value": 0,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "coding",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench category aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_category_instruction_following_mean_judgment",
          "value": 1,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "instruction_following",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench category aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_category_language_mean_judgment",
          "value": 0.463235,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "language",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench category aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_overall_mean_judgment",
          "value": 0.407563,
          "normalized_0_100": 40.76,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench judgment aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_task_connections_mean_judgment",
          "value": 0.25,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "language",
          "task": "connections",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench task aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_task_lcb_generation_mean_judgment",
          "value": 0,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "coding",
          "task": "LCB_generation",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench task aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_task_plot_unscrambling_mean_judgment",
          "value": 0.352941,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "language",
          "task": "plot_unscrambling",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench task aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_task_story_generation_mean_judgment",
          "value": 1,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "instruction_following",
          "task": "story_generation",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench task aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_task_typos_mean_judgment",
          "value": 1,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "language",
          "task": "typos",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench task aggregate; not a RunLocalAI local execution measurement."
        }
      ]
    },
    {
      "model_key": "llama-3-3-70b-instruct-turbo",
      "display_name": "llama-3.3-70b-instruct-turbo",
      "provider": null,
      "aliases": [
        "llama-3.3-70b-instruct-turbo"
      ],
      "openness": null,
      "license": null,
      "parameter_billions": null,
      "context_window": null,
      "modalities": [
        "text"
      ],
      "source_ids": [
        "livebench"
      ],
      "composite": {
        "score_0_100": 40.49,
        "confidence": "low",
        "components": [
          {
            "source_id": "livebench",
            "metric": "livebench_overall_mean_judgment",
            "normalized_0_100": 40.49,
            "weight": 0.2
          }
        ],
        "derived_from": [
          "livebench:livebench_overall_mean_judgment"
        ],
        "limitations": [
          "This is an external model-intelligence prior, not a RunLocalAI local speed or fit measurement.",
          "Provider papers, human-preference ratings, and LiveBench judgments are not interchangeable with quantized local execution.",
          "Use this score only beside Will-It-Run fit, quantization, context, runtime, and measured tok/s evidence."
        ]
      },
      "scores": [
        {
          "source_id": "livebench",
          "metric": "livebench_category_coding_mean_judgment",
          "value": 0.5,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "coding",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench category aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_category_instruction_following_mean_judgment",
          "value": 0.333333,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "instruction_following",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench category aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_category_language_mean_judgment",
          "value": 0.381098,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "language",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench category aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_overall_mean_judgment",
          "value": 0.404853,
          "normalized_0_100": 40.49,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench judgment aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_task_coding_completion_mean_judgment",
          "value": 0,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "coding",
          "task": "coding_completion",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench task aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_task_lcb_generation_mean_judgment",
          "value": 1,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "coding",
          "task": "LCB_generation",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench task aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_task_paraphrase_mean_judgment",
          "value": 0.333333,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "instruction_following",
          "task": "paraphrase",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench task aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_task_plot_unscrambling_mean_judgment",
          "value": 0.301829,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "language",
          "task": "plot_unscrambling",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench task aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_task_typos_mean_judgment",
          "value": 0.5,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "language",
          "task": "typos",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench task aggregate; not a RunLocalAI local execution measurement."
        }
      ]
    },
    {
      "model_key": "vicuna-13b",
      "display_name": "vicuna-13b",
      "provider": null,
      "aliases": [
        "vicuna-13b"
      ],
      "openness": null,
      "license": "Llama 2 Community",
      "parameter_billions": null,
      "context_window": null,
      "modalities": [
        "text"
      ],
      "source_ids": [
        "lmarena"
      ],
      "composite": {
        "score_0_100": 40.4,
        "confidence": "low",
        "components": [
          {
            "source_id": "lmarena",
            "metric": "lmarena_text_overall_rating",
            "normalized_0_100": 40.4,
            "weight": 0.35
          }
        ],
        "derived_from": [
          "lmarena:lmarena_text_overall_rating"
        ],
        "limitations": [
          "This is an external model-intelligence prior, not a RunLocalAI local speed or fit measurement.",
          "Provider papers, human-preference ratings, and LiveBench judgments are not interchangeable with quantized local execution.",
          "Use this score only beside Will-It-Run fit, quantization, context, runtime, and measured tok/s evidence."
        ]
      },
      "scores": [
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_chinese_rating",
          "value": 1038.5662265583094,
          "normalized_0_100": 38.13,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 300,
          "lower": 1019.2451202541549,
          "upper": 1057.8873328624636,
          "category": "chinese",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_overall_rating",
          "value": 1057.6993305003573,
          "normalized_0_100": 40.4,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 330,
          "lower": 1051.0863802734057,
          "upper": 1064.3122807273091,
          "category": "overall",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        }
      ]
    },
    {
      "model_key": "codellama-70b-instruct",
      "display_name": "codellama-70b-instruct",
      "provider": "meta",
      "aliases": [
        "codellama-70b-instruct"
      ],
      "openness": null,
      "license": "Llama 2 Community",
      "parameter_billions": null,
      "context_window": null,
      "modalities": [
        "text"
      ],
      "source_ids": [
        "lmarena"
      ],
      "composite": {
        "score_0_100": 40.35,
        "confidence": "low",
        "components": [
          {
            "source_id": "lmarena",
            "metric": "lmarena_text_overall_rating",
            "normalized_0_100": 40.35,
            "weight": 0.35
          }
        ],
        "derived_from": [
          "lmarena:lmarena_text_overall_rating"
        ],
        "limitations": [
          "This is an external model-intelligence prior, not a RunLocalAI local speed or fit measurement.",
          "Provider papers, human-preference ratings, and LiveBench judgments are not interchangeable with quantized local execution.",
          "Use this score only beside Will-It-Run fit, quantization, context, runtime, and measured tok/s evidence."
        ]
      },
      "scores": [
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_overall_rating",
          "value": 1057.283500695992,
          "normalized_0_100": 40.35,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 331,
          "lower": 1039.1981264940368,
          "upper": 1075.368874897947,
          "category": "overall",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        }
      ]
    },
    {
      "model_key": "gemma-7b-it",
      "display_name": "gemma-7b-it",
      "provider": "google",
      "aliases": [
        "gemma-7b-it"
      ],
      "openness": null,
      "license": "Gemma license",
      "parameter_billions": null,
      "context_window": null,
      "modalities": [
        "text"
      ],
      "source_ids": [
        "lmarena"
      ],
      "composite": {
        "score_0_100": 40.23,
        "confidence": "low",
        "components": [
          {
            "source_id": "lmarena",
            "metric": "lmarena_text_overall_rating",
            "normalized_0_100": 40.23,
            "weight": 0.35
          }
        ],
        "derived_from": [
          "lmarena:lmarena_text_overall_rating"
        ],
        "limitations": [
          "This is an external model-intelligence prior, not a RunLocalAI local speed or fit measurement.",
          "Provider papers, human-preference ratings, and LiveBench judgments are not interchangeable with quantized local execution.",
          "Use this score only beside Will-It-Run fit, quantization, context, runtime, and measured tok/s evidence."
        ]
      },
      "scores": [
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_chinese_rating",
          "value": 1034.4986888226967,
          "normalized_0_100": 37.65,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 302,
          "lower": 1014.8362550947552,
          "upper": 1054.1611225506385,
          "category": "chinese",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_overall_rating",
          "value": 1056.1963414914187,
          "normalized_0_100": 40.23,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 332,
          "lower": 1046.6380256242821,
          "upper": 1065.7546573585553,
          "category": "overall",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        }
      ]
    },
    {
      "model_key": "lcb-math-qwen2-72b-instructv3-merged-50",
      "display_name": "lcb-math-qwen2-72b-instructv3-merged-50",
      "provider": null,
      "aliases": [
        "lcb-math-qwen2-72b-instructv3-merged-50"
      ],
      "openness": null,
      "license": null,
      "parameter_billions": null,
      "context_window": null,
      "modalities": [
        "text"
      ],
      "source_ids": [
        "livebench"
      ],
      "composite": {
        "score_0_100": 40.2,
        "confidence": "low",
        "components": [
          {
            "source_id": "livebench",
            "metric": "livebench_overall_mean_judgment",
            "normalized_0_100": 40.2,
            "weight": 0.2
          }
        ],
        "derived_from": [
          "livebench:livebench_overall_mean_judgment"
        ],
        "limitations": [
          "This is an external model-intelligence prior, not a RunLocalAI local speed or fit measurement.",
          "Provider papers, human-preference ratings, and LiveBench judgments are not interchangeable with quantized local execution.",
          "Use this score only beside Will-It-Run fit, quantization, context, runtime, and measured tok/s evidence."
        ]
      },
      "scores": [
        {
          "source_id": "livebench",
          "metric": "livebench_category_coding_mean_judgment",
          "value": 0.5,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "coding",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench category aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_category_instruction_following_mean_judgment",
          "value": 1,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "instruction_following",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench category aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_category_language_mean_judgment",
          "value": 0.137255,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "language",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench category aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_overall_mean_judgment",
          "value": 0.401961,
          "normalized_0_100": 40.2,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench judgment aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_task_connections_mean_judgment",
          "value": 0,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "language",
          "task": "connections",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench task aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_task_lcb_generation_mean_judgment",
          "value": 0.5,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "coding",
          "task": "LCB_generation",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench task aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_task_plot_unscrambling_mean_judgment",
          "value": 0.411765,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "language",
          "task": "plot_unscrambling",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench task aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_task_story_generation_mean_judgment",
          "value": 1,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "instruction_following",
          "task": "story_generation",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench task aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_task_typos_mean_judgment",
          "value": 0,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "language",
          "task": "typos",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench task aggregate; not a RunLocalAI local execution measurement."
        }
      ]
    },
    {
      "model_key": "mistral-small-2501",
      "display_name": "mistral-small-2501",
      "provider": null,
      "aliases": [
        "mistral-small-2501"
      ],
      "openness": null,
      "license": null,
      "parameter_billions": null,
      "context_window": null,
      "modalities": [
        "text"
      ],
      "source_ids": [
        "livebench"
      ],
      "composite": {
        "score_0_100": 40.06,
        "confidence": "low",
        "components": [
          {
            "source_id": "livebench",
            "metric": "livebench_overall_mean_judgment",
            "normalized_0_100": 40.06,
            "weight": 0.2
          }
        ],
        "derived_from": [
          "livebench:livebench_overall_mean_judgment"
        ],
        "limitations": [
          "This is an external model-intelligence prior, not a RunLocalAI local speed or fit measurement.",
          "Provider papers, human-preference ratings, and LiveBench judgments are not interchangeable with quantized local execution.",
          "Use this score only beside Will-It-Run fit, quantization, context, runtime, and measured tok/s evidence."
        ]
      },
      "scores": [
        {
          "source_id": "livebench",
          "metric": "livebench_category_coding_mean_judgment",
          "value": 0,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "coding",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench category aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_category_instruction_following_mean_judgment",
          "value": 1,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "instruction_following",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench category aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_category_language_mean_judgment",
          "value": 0.360901,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "language",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench category aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_overall_mean_judgment",
          "value": 0.400643,
          "normalized_0_100": 40.06,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench judgment aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_task_coding_completion_mean_judgment",
          "value": 0,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "coding",
          "task": "coding_completion",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench task aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_task_paraphrase_mean_judgment",
          "value": 1,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "instruction_following",
          "task": "paraphrase",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench task aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_task_plot_unscrambling_mean_judgment",
          "value": 0.268168,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "language",
          "task": "plot_unscrambling",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench task aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_task_typos_mean_judgment",
          "value": 0.5,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "language",
          "task": "typos",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench task aggregate; not a RunLocalAI local execution measurement."
        }
      ]
    },
    {
      "model_key": "llama-3-2-1b-instruct",
      "display_name": "llama-3.2-1b-instruct",
      "provider": "meta",
      "aliases": [
        "llama-3.2-1b-instruct"
      ],
      "openness": null,
      "license": "Llama 3.2",
      "parameter_billions": null,
      "context_window": null,
      "modalities": [
        "text"
      ],
      "source_ids": [
        "lmarena"
      ],
      "composite": {
        "score_0_100": 40.04,
        "confidence": "low",
        "components": [
          {
            "source_id": "lmarena",
            "metric": "lmarena_text_overall_rating",
            "normalized_0_100": 40.04,
            "weight": 0.35
          }
        ],
        "derived_from": [
          "lmarena:lmarena_text_overall_rating"
        ],
        "limitations": [
          "This is an external model-intelligence prior, not a RunLocalAI local speed or fit measurement.",
          "Provider papers, human-preference ratings, and LiveBench judgments are not interchangeable with quantized local execution.",
          "Use this score only beside Will-It-Run fit, quantization, context, runtime, and measured tok/s evidence."
        ]
      },
      "scores": [
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_chinese_rating",
          "value": 957.7139705303814,
          "normalized_0_100": 28.51,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 322,
          "lower": 931.4091084319534,
          "upper": 984.0188326288094,
          "category": "chinese",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_overall_rating",
          "value": 1054.6778884319897,
          "normalized_0_100": 40.04,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 333,
          "lower": 1046.9693510184686,
          "upper": 1062.386425845511,
          "category": "overall",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        }
      ]
    },
    {
      "model_key": "falcon-180b-chat",
      "display_name": "falcon-180b-chat",
      "provider": null,
      "aliases": [
        "falcon-180b-chat"
      ],
      "openness": null,
      "license": "Falcon-180B TII License",
      "parameter_billions": null,
      "context_window": null,
      "modalities": [
        "text"
      ],
      "source_ids": [
        "lmarena"
      ],
      "composite": {
        "score_0_100": 40.01,
        "confidence": "low",
        "components": [
          {
            "source_id": "lmarena",
            "metric": "lmarena_text_overall_rating",
            "normalized_0_100": 40.01,
            "weight": 0.35
          }
        ],
        "derived_from": [
          "lmarena:lmarena_text_overall_rating"
        ],
        "limitations": [
          "This is an external model-intelligence prior, not a RunLocalAI local speed or fit measurement.",
          "Provider papers, human-preference ratings, and LiveBench judgments are not interchangeable with quantized local execution.",
          "Use this score only beside Will-It-Run fit, quantization, context, runtime, and measured tok/s evidence."
        ]
      },
      "scores": [
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_overall_rating",
          "value": 1054.394801503922,
          "normalized_0_100": 40.01,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 334,
          "lower": 1037.2979679865412,
          "upper": 1071.491635021303,
          "category": "overall",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        }
      ]
    },
    {
      "model_key": "guanaco-33b",
      "display_name": "guanaco-33b",
      "provider": null,
      "aliases": [
        "guanaco-33b"
      ],
      "openness": null,
      "license": "Non-commercial",
      "parameter_billions": null,
      "context_window": null,
      "modalities": [
        "text"
      ],
      "source_ids": [
        "lmarena"
      ],
      "composite": {
        "score_0_100": 39.9,
        "confidence": "low",
        "components": [
          {
            "source_id": "lmarena",
            "metric": "lmarena_text_overall_rating",
            "normalized_0_100": 39.9,
            "weight": 0.35
          }
        ],
        "derived_from": [
          "lmarena:lmarena_text_overall_rating"
        ],
        "limitations": [
          "This is an external model-intelligence prior, not a RunLocalAI local speed or fit measurement.",
          "Provider papers, human-preference ratings, and LiveBench judgments are not interchangeable with quantized local execution.",
          "Use this score only beside Will-It-Run fit, quantization, context, runtime, and measured tok/s evidence."
        ]
      },
      "scores": [
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_overall_rating",
          "value": 1053.419750267534,
          "normalized_0_100": 39.9,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 336,
          "lower": 1041.3421820535746,
          "upper": 1065.4973184814933,
          "category": "overall",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        }
      ]
    },
    {
      "model_key": "llama-2-7b-chat",
      "display_name": "llama-2-7b-chat",
      "provider": "meta",
      "aliases": [
        "llama-2-7b-chat"
      ],
      "openness": null,
      "license": "Llama 2 Community",
      "parameter_billions": null,
      "context_window": null,
      "modalities": [
        "text"
      ],
      "source_ids": [
        "lmarena"
      ],
      "composite": {
        "score_0_100": 39.9,
        "confidence": "low",
        "components": [
          {
            "source_id": "lmarena",
            "metric": "lmarena_text_overall_rating",
            "normalized_0_100": 39.9,
            "weight": 0.35
          }
        ],
        "derived_from": [
          "lmarena:lmarena_text_overall_rating"
        ],
        "limitations": [
          "This is an external model-intelligence prior, not a RunLocalAI local speed or fit measurement.",
          "Provider papers, human-preference ratings, and LiveBench judgments are not interchangeable with quantized local execution.",
          "Use this score only beside Will-It-Run fit, quantization, context, runtime, and measured tok/s evidence."
        ]
      },
      "scores": [
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_chinese_rating",
          "value": 972.0090480910893,
          "normalized_0_100": 30.21,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 320,
          "lower": 952.3215469785298,
          "upper": 991.6965492036488,
          "category": "chinese",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_overall_rating",
          "value": 1053.4398811015672,
          "normalized_0_100": 39.9,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 335,
          "lower": 1046.455667431179,
          "upper": 1060.424094771955,
          "category": "overall",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        }
      ]
    },
    {
      "model_key": "qwen-14b-chat",
      "display_name": "qwen-14b-chat",
      "provider": "alibaba",
      "aliases": [
        "qwen-14b-chat"
      ],
      "openness": null,
      "license": "Qianwen LICENSE",
      "parameter_billions": null,
      "context_window": null,
      "modalities": [
        "text"
      ],
      "source_ids": [
        "lmarena"
      ],
      "composite": {
        "score_0_100": 39.62,
        "confidence": "low",
        "components": [
          {
            "source_id": "lmarena",
            "metric": "lmarena_text_overall_rating",
            "normalized_0_100": 39.62,
            "weight": 0.35
          }
        ],
        "derived_from": [
          "lmarena:lmarena_text_overall_rating"
        ],
        "limitations": [
          "This is an external model-intelligence prior, not a RunLocalAI local speed or fit measurement.",
          "Provider papers, human-preference ratings, and LiveBench judgments are not interchangeable with quantized local execution.",
          "Use this score only beside Will-It-Run fit, quantization, context, runtime, and measured tok/s evidence."
        ]
      },
      "scores": [
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_chinese_rating",
          "value": 1075.9418776425869,
          "normalized_0_100": 42.57,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 286,
          "lower": 1032.2413395259846,
          "upper": 1119.6424157591896,
          "category": "chinese",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_overall_rating",
          "value": 1051.0685704744587,
          "normalized_0_100": 39.62,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 337,
          "lower": 1040.1632369332046,
          "upper": 1061.9739040157128,
          "category": "overall",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        }
      ]
    },
    {
      "model_key": "miromind-ai-mirothinker-v1-5-235b",
      "display_name": "miromind-ai/MiroThinker-v1.5-235B",
      "provider": "miromind-ai",
      "aliases": [
        "miromind-ai/MiroThinker-v1.5-235B"
      ],
      "openness": "open",
      "license": "Unknown",
      "parameter_billions": 235.1,
      "context_window": null,
      "modalities": [
        "text"
      ],
      "source_ids": [
        "openevals"
      ],
      "composite": {
        "score_0_100": 39.2,
        "confidence": "low",
        "components": [
          {
            "source_id": "openevals",
            "metric": "openevals_aggregate",
            "normalized_0_100": 39.2,
            "weight": 0.45
          }
        ],
        "derived_from": [
          "openevals:openevals_aggregate"
        ],
        "limitations": [
          "This is an external model-intelligence prior, not a RunLocalAI local speed or fit measurement.",
          "Provider papers, human-preference ratings, and LiveBench judgments are not interchangeable with quantized local execution.",
          "Use this score only beside Will-It-Run fit, quantization, context, runtime, and measured tok/s evidence."
        ]
      },
      "scores": [
        {
          "source_id": "openevals",
          "metric": "hle",
          "value": 39.2,
          "normalized_0_100": 39.2,
          "scale": "0-100",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/OpenEvals/leaderboard-data",
          "source_dataset": "OpenEvals/leaderboard-data",
          "source_config": "default",
          "source_split": "train",
          "source_row_count": 105,
          "captured_at": "2026-05-28T11:57:28.766Z",
          "evidence_note": "External model quality prior from OpenEvals; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "openevals",
          "metric": "openevals_aggregate",
          "value": 39.2,
          "normalized_0_100": 39.2,
          "scale": "0-100",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/OpenEvals/leaderboard-data",
          "source_dataset": "OpenEvals/leaderboard-data",
          "source_config": "default",
          "source_split": "train",
          "source_row_count": 105,
          "captured_at": "2026-05-28T11:57:28.766Z",
          "evidence_note": "External model quality prior from OpenEvals; not a RunLocalAI local execution measurement."
        }
      ]
    },
    {
      "model_key": "liquidai-lfm2-5-1-2b-instruct",
      "display_name": "LiquidAI/LFM2.5-1.2B-Instruct",
      "provider": "LiquidAI",
      "aliases": [
        "LiquidAI/LFM2.5-1.2B-Instruct"
      ],
      "openness": "open",
      "license": "Unknown",
      "parameter_billions": 1.2,
      "context_window": null,
      "modalities": [
        "text"
      ],
      "source_ids": [
        "openevals"
      ],
      "composite": {
        "score_0_100": 38.89,
        "confidence": "low",
        "components": [
          {
            "source_id": "openevals",
            "metric": "openevals_aggregate",
            "normalized_0_100": 38.89,
            "weight": 0.45
          }
        ],
        "derived_from": [
          "openevals:openevals_aggregate"
        ],
        "limitations": [
          "This is an external model-intelligence prior, not a RunLocalAI local speed or fit measurement.",
          "Provider papers, human-preference ratings, and LiveBench judgments are not interchangeable with quantized local execution.",
          "Use this score only beside Will-It-Run fit, quantization, context, runtime, and measured tok/s evidence."
        ]
      },
      "scores": [
        {
          "source_id": "openevals",
          "metric": "gpqa",
          "value": 38.89,
          "normalized_0_100": 38.89,
          "scale": "0-100",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/OpenEvals/leaderboard-data",
          "source_dataset": "OpenEvals/leaderboard-data",
          "source_config": "default",
          "source_split": "train",
          "source_row_count": 105,
          "captured_at": "2026-05-28T11:57:28.766Z",
          "evidence_note": "External model quality prior from OpenEvals; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "openevals",
          "metric": "openevals_aggregate",
          "value": 38.89,
          "normalized_0_100": 38.89,
          "scale": "0-100",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/OpenEvals/leaderboard-data",
          "source_dataset": "OpenEvals/leaderboard-data",
          "source_config": "default",
          "source_split": "train",
          "source_row_count": 105,
          "captured_at": "2026-05-28T11:57:28.766Z",
          "evidence_note": "External model quality prior from OpenEvals; not a RunLocalAI local execution measurement."
        }
      ]
    },
    {
      "model_key": "smollm2-1-7b-instruct",
      "display_name": "smollm2-1.7b-instruct",
      "provider": null,
      "aliases": [
        "smollm2-1.7b-instruct"
      ],
      "openness": null,
      "license": "Apache 2.0",
      "parameter_billions": null,
      "context_window": null,
      "modalities": [
        "text"
      ],
      "source_ids": [
        "lmarena"
      ],
      "composite": {
        "score_0_100": 38.55,
        "confidence": "low",
        "components": [
          {
            "source_id": "lmarena",
            "metric": "lmarena_text_overall_rating",
            "normalized_0_100": 38.55,
            "weight": 0.35
          }
        ],
        "derived_from": [
          "lmarena:lmarena_text_overall_rating"
        ],
        "limitations": [
          "This is an external model-intelligence prior, not a RunLocalAI local speed or fit measurement.",
          "Provider papers, human-preference ratings, and LiveBench judgments are not interchangeable with quantized local execution.",
          "Use this score only beside Will-It-Run fit, quantization, context, runtime, and measured tok/s evidence."
        ]
      },
      "scores": [
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_overall_rating",
          "value": 1042.069668555414,
          "normalized_0_100": 38.55,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 339,
          "lower": 1027.927902190381,
          "upper": 1056.2114349204467,
          "category": "overall",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        }
      ]
    },
    {
      "model_key": "stripedhyena-nous-7b",
      "display_name": "stripedhyena-nous-7b",
      "provider": null,
      "aliases": [
        "stripedhyena-nous-7b"
      ],
      "openness": null,
      "license": "Apache 2.0",
      "parameter_billions": null,
      "context_window": null,
      "modalities": [
        "text"
      ],
      "source_ids": [
        "lmarena"
      ],
      "composite": {
        "score_0_100": 38.12,
        "confidence": "low",
        "components": [
          {
            "source_id": "lmarena",
            "metric": "lmarena_text_overall_rating",
            "normalized_0_100": 38.12,
            "weight": 0.35
          }
        ],
        "derived_from": [
          "lmarena:lmarena_text_overall_rating"
        ],
        "limitations": [
          "This is an external model-intelligence prior, not a RunLocalAI local speed or fit measurement.",
          "Provider papers, human-preference ratings, and LiveBench judgments are not interchangeable with quantized local execution.",
          "Use this score only beside Will-It-Run fit, quantization, context, runtime, and measured tok/s evidence."
        ]
      },
      "scores": [
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_overall_rating",
          "value": 1038.480873571044,
          "normalized_0_100": 38.12,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 340,
          "lower": 1027.4999231128736,
          "upper": 1049.4618240292145,
          "category": "overall",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        }
      ]
    },
    {
      "model_key": "qwen2-7b-instruct",
      "display_name": "Qwen2-7B-Instruct",
      "provider": null,
      "aliases": [
        "Qwen2-7B-Instruct"
      ],
      "openness": null,
      "license": null,
      "parameter_billions": null,
      "context_window": null,
      "modalities": [
        "text"
      ],
      "source_ids": [
        "livebench"
      ],
      "composite": {
        "score_0_100": 37.5,
        "confidence": "low",
        "components": [
          {
            "source_id": "livebench",
            "metric": "livebench_overall_mean_judgment",
            "normalized_0_100": 37.5,
            "weight": 0.2
          }
        ],
        "derived_from": [
          "livebench:livebench_overall_mean_judgment"
        ],
        "limitations": [
          "This is an external model-intelligence prior, not a RunLocalAI local speed or fit measurement.",
          "Provider papers, human-preference ratings, and LiveBench judgments are not interchangeable with quantized local execution.",
          "Use this score only beside Will-It-Run fit, quantization, context, runtime, and measured tok/s evidence."
        ]
      },
      "scores": [
        {
          "source_id": "livebench",
          "metric": "livebench_category_coding_mean_judgment",
          "value": 0.5,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "coding",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench category aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_category_instruction_following_mean_judgment",
          "value": 0.25,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "instruction_following",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench category aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_category_language_mean_judgment",
          "value": 0,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "language",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench category aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_overall_mean_judgment",
          "value": 0.375,
          "normalized_0_100": 37.5,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench judgment aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_task_coding_completion_mean_judgment",
          "value": 1,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "coding",
          "task": "coding_completion",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench task aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_task_connections_mean_judgment",
          "value": 0,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "language",
          "task": "connections",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench task aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_task_lcb_generation_mean_judgment",
          "value": 0.333333,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "coding",
          "task": "LCB_generation",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench task aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_task_story_generation_mean_judgment",
          "value": 0.25,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "instruction_following",
          "task": "story_generation",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench task aggregate; not a RunLocalAI local execution measurement."
        }
      ]
    },
    {
      "model_key": "olmo-7b-instruct",
      "display_name": "olmo-7b-instruct",
      "provider": "allenai",
      "aliases": [
        "olmo-7b-instruct"
      ],
      "openness": null,
      "license": "Apache-2.0",
      "parameter_billions": null,
      "context_window": null,
      "modalities": [
        "text"
      ],
      "source_ids": [
        "lmarena"
      ],
      "composite": {
        "score_0_100": 37.33,
        "confidence": "low",
        "components": [
          {
            "source_id": "lmarena",
            "metric": "lmarena_text_overall_rating",
            "normalized_0_100": 37.33,
            "weight": 0.35
          }
        ],
        "derived_from": [
          "lmarena:lmarena_text_overall_rating"
        ],
        "limitations": [
          "This is an external model-intelligence prior, not a RunLocalAI local speed or fit measurement.",
          "Provider papers, human-preference ratings, and LiveBench judgments are not interchangeable with quantized local execution.",
          "Use this score only beside Will-It-Run fit, quantization, context, runtime, and measured tok/s evidence."
        ]
      },
      "scores": [
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_chinese_rating",
          "value": 1012.6428844311565,
          "normalized_0_100": 35.05,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 310,
          "lower": 989.410313902556,
          "upper": 1035.875454959757,
          "category": "chinese",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_overall_rating",
          "value": 1031.8520977884943,
          "normalized_0_100": 37.33,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 341,
          "lower": 1020.7724222946943,
          "upper": 1042.9317732822944,
          "category": "overall",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        }
      ]
    },
    {
      "model_key": "vicuna-7b",
      "display_name": "vicuna-7b",
      "provider": null,
      "aliases": [
        "vicuna-7b"
      ],
      "openness": null,
      "license": "Llama 2 Community",
      "parameter_billions": null,
      "context_window": null,
      "modalities": [
        "text"
      ],
      "source_ids": [
        "lmarena"
      ],
      "composite": {
        "score_0_100": 37.23,
        "confidence": "low",
        "components": [
          {
            "source_id": "lmarena",
            "metric": "lmarena_text_overall_rating",
            "normalized_0_100": 37.23,
            "weight": 0.35
          }
        ],
        "derived_from": [
          "lmarena:lmarena_text_overall_rating"
        ],
        "limitations": [
          "This is an external model-intelligence prior, not a RunLocalAI local speed or fit measurement.",
          "Provider papers, human-preference ratings, and LiveBench judgments are not interchangeable with quantized local execution.",
          "Use this score only beside Will-It-Run fit, quantization, context, runtime, and measured tok/s evidence."
        ]
      },
      "scores": [
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_chinese_rating",
          "value": 978.0776757914041,
          "normalized_0_100": 30.94,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 318,
          "lower": 945.0274014921936,
          "upper": 1011.1279500906144,
          "category": "chinese",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_overall_rating",
          "value": 1030.9875024693274,
          "normalized_0_100": 37.23,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 342,
          "lower": 1021.8555171579395,
          "upper": 1040.1194877807152,
          "category": "overall",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        }
      ]
    },
    {
      "model_key": "nvidia-nemotron-orchestrator-8b",
      "display_name": "nvidia/Nemotron-Orchestrator-8B",
      "provider": "nvidia",
      "aliases": [
        "nvidia/Nemotron-Orchestrator-8B"
      ],
      "openness": "open",
      "license": "Unknown",
      "parameter_billions": null,
      "context_window": null,
      "modalities": [
        "text"
      ],
      "source_ids": [
        "openevals"
      ],
      "composite": {
        "score_0_100": 37.1,
        "confidence": "low",
        "components": [
          {
            "source_id": "openevals",
            "metric": "openevals_aggregate",
            "normalized_0_100": 37.1,
            "weight": 0.45
          }
        ],
        "derived_from": [
          "openevals:openevals_aggregate"
        ],
        "limitations": [
          "This is an external model-intelligence prior, not a RunLocalAI local speed or fit measurement.",
          "Provider papers, human-preference ratings, and LiveBench judgments are not interchangeable with quantized local execution.",
          "Use this score only beside Will-It-Run fit, quantization, context, runtime, and measured tok/s evidence."
        ]
      },
      "scores": [
        {
          "source_id": "openevals",
          "metric": "hle",
          "value": 37.1,
          "normalized_0_100": 37.1,
          "scale": "0-100",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/OpenEvals/leaderboard-data",
          "source_dataset": "OpenEvals/leaderboard-data",
          "source_config": "default",
          "source_split": "train",
          "source_row_count": 105,
          "captured_at": "2026-05-28T11:57:28.766Z",
          "evidence_note": "External model quality prior from OpenEvals; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "openevals",
          "metric": "openevals_aggregate",
          "value": 37.1,
          "normalized_0_100": 37.1,
          "scale": "0-100",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/OpenEvals/leaderboard-data",
          "source_dataset": "OpenEvals/leaderboard-data",
          "source_config": "default",
          "source_split": "train",
          "source_row_count": 105,
          "captured_at": "2026-05-28T11:57:28.766Z",
          "evidence_note": "External model quality prior from OpenEvals; not a RunLocalAI local execution measurement."
        }
      ]
    },
    {
      "model_key": "mistral-small-2503",
      "display_name": "mistral-small-2503",
      "provider": null,
      "aliases": [
        "mistral-small-2503"
      ],
      "openness": null,
      "license": null,
      "parameter_billions": null,
      "context_window": null,
      "modalities": [
        "text"
      ],
      "source_ids": [
        "livebench"
      ],
      "composite": {
        "score_0_100": 36.91,
        "confidence": "low",
        "components": [
          {
            "source_id": "livebench",
            "metric": "livebench_overall_mean_judgment",
            "normalized_0_100": 36.91,
            "weight": 0.2
          }
        ],
        "derived_from": [
          "livebench:livebench_overall_mean_judgment"
        ],
        "limitations": [
          "This is an external model-intelligence prior, not a RunLocalAI local speed or fit measurement.",
          "Provider papers, human-preference ratings, and LiveBench judgments are not interchangeable with quantized local execution.",
          "Use this score only beside Will-It-Run fit, quantization, context, runtime, and measured tok/s evidence."
        ]
      },
      "scores": [
        {
          "source_id": "livebench",
          "metric": "livebench_category_coding_mean_judgment",
          "value": 0,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "coding",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench category aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_category_instruction_following_mean_judgment",
          "value": 1,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "instruction_following",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench category aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_category_language_mean_judgment",
          "value": 0.316707,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "language",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench category aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_overall_mean_judgment",
          "value": 0.369076,
          "normalized_0_100": 36.91,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench judgment aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_task_coding_completion_mean_judgment",
          "value": 0,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "coding",
          "task": "coding_completion",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench task aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_task_paraphrase_mean_judgment",
          "value": 1,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "instruction_following",
          "task": "paraphrase",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench task aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_task_plot_unscrambling_mean_judgment",
          "value": 0.194512,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "language",
          "task": "plot_unscrambling",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench task aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_task_typos_mean_judgment",
          "value": 0.5,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "language",
          "task": "typos",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench task aggregate; not a RunLocalAI local execution measurement."
        }
      ]
    },
    {
      "model_key": "palm-2",
      "display_name": "palm-2",
      "provider": "google",
      "aliases": [
        "palm-2"
      ],
      "openness": null,
      "license": "Proprietary",
      "parameter_billions": null,
      "context_window": null,
      "modalities": [
        "text"
      ],
      "source_ids": [
        "lmarena"
      ],
      "composite": {
        "score_0_100": 36.79,
        "confidence": "low",
        "components": [
          {
            "source_id": "lmarena",
            "metric": "lmarena_text_overall_rating",
            "normalized_0_100": 36.79,
            "weight": 0.35
          }
        ],
        "derived_from": [
          "lmarena:lmarena_text_overall_rating"
        ],
        "limitations": [
          "This is an external model-intelligence prior, not a RunLocalAI local speed or fit measurement.",
          "Provider papers, human-preference ratings, and LiveBench judgments are not interchangeable with quantized local execution.",
          "Use this score only beside Will-It-Run fit, quantization, context, runtime, and measured tok/s evidence."
        ]
      },
      "scores": [
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_chinese_rating",
          "value": 886.3035897812342,
          "normalized_0_100": 20.02,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 326,
          "lower": 849.1071779824833,
          "upper": 923.5000015799852,
          "category": "chinese",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_overall_rating",
          "value": 1027.278483791331,
          "normalized_0_100": 36.79,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 343,
          "lower": 1018.0221475007713,
          "upper": 1036.5348200818905,
          "category": "overall",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        }
      ]
    },
    {
      "model_key": "step-2-16k-202411",
      "display_name": "step-2-16k-202411",
      "provider": null,
      "aliases": [
        "step-2-16k-202411"
      ],
      "openness": null,
      "license": null,
      "parameter_billions": null,
      "context_window": null,
      "modalities": [
        "text"
      ],
      "source_ids": [
        "livebench"
      ],
      "composite": {
        "score_0_100": 36.51,
        "confidence": "low",
        "components": [
          {
            "source_id": "livebench",
            "metric": "livebench_overall_mean_judgment",
            "normalized_0_100": 36.51,
            "weight": 0.2
          }
        ],
        "derived_from": [
          "livebench:livebench_overall_mean_judgment"
        ],
        "limitations": [
          "This is an external model-intelligence prior, not a RunLocalAI local speed or fit measurement.",
          "Provider papers, human-preference ratings, and LiveBench judgments are not interchangeable with quantized local execution.",
          "Use this score only beside Will-It-Run fit, quantization, context, runtime, and measured tok/s evidence."
        ]
      },
      "scores": [
        {
          "source_id": "livebench",
          "metric": "livebench_category_coding_mean_judgment",
          "value": 0.25,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "coding",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench category aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_category_instruction_following_mean_judgment",
          "value": 1,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "instruction_following",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench category aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_category_language_mean_judgment",
          "value": 0.203161,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "language",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench category aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_overall_mean_judgment",
          "value": 0.365073,
          "normalized_0_100": 36.51,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench judgment aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_task_coding_completion_mean_judgment",
          "value": 0,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "coding",
          "task": "coding_completion",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench task aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_task_connections_mean_judgment",
          "value": 0.25,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "language",
          "task": "connections",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench task aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_task_lcb_generation_mean_judgment",
          "value": 0.333333,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "coding",
          "task": "LCB_generation",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench task aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_task_paraphrase_mean_judgment",
          "value": 1,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "instruction_following",
          "task": "paraphrase",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench task aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_task_plot_unscrambling_mean_judgment",
          "value": 0.255268,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "language",
          "task": "plot_unscrambling",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench task aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_task_story_generation_mean_judgment",
          "value": 1,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "instruction_following",
          "task": "story_generation",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench task aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_task_typos_mean_judgment",
          "value": 0,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "language",
          "task": "typos",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench task aggregate; not a RunLocalAI local execution measurement."
        }
      ]
    },
    {
      "model_key": "mistral-7b-instruct",
      "display_name": "mistral-7b-instruct",
      "provider": "mistral",
      "aliases": [
        "mistral-7b-instruct"
      ],
      "openness": null,
      "license": "Apache 2.0",
      "parameter_billions": null,
      "context_window": null,
      "modalities": [
        "text"
      ],
      "source_ids": [
        "lmarena"
      ],
      "composite": {
        "score_0_100": 36.34,
        "confidence": "low",
        "components": [
          {
            "source_id": "lmarena",
            "metric": "lmarena_text_overall_rating",
            "normalized_0_100": 36.34,
            "weight": 0.35
          }
        ],
        "derived_from": [
          "lmarena:lmarena_text_overall_rating"
        ],
        "limitations": [
          "This is an external model-intelligence prior, not a RunLocalAI local speed or fit measurement.",
          "Provider papers, human-preference ratings, and LiveBench judgments are not interchangeable with quantized local execution.",
          "Use this score only beside Will-It-Run fit, quantization, context, runtime, and measured tok/s evidence."
        ]
      },
      "scores": [
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_chinese_rating",
          "value": 930.5165513141067,
          "normalized_0_100": 25.28,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 324,
          "lower": 897.4857889188613,
          "upper": 963.5473137093519,
          "category": "chinese",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_overall_rating",
          "value": 1023.5485628292377,
          "normalized_0_100": 36.34,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 344,
          "lower": 1014.351797462008,
          "upper": 1032.7453281964672,
          "category": "overall",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        }
      ]
    },
    {
      "model_key": "mathstral-7b-v0-1",
      "display_name": "mathstral-7B-v0.1",
      "provider": null,
      "aliases": [
        "mathstral-7B-v0.1"
      ],
      "openness": null,
      "license": null,
      "parameter_billions": null,
      "context_window": null,
      "modalities": [
        "text"
      ],
      "source_ids": [
        "livebench"
      ],
      "composite": {
        "score_0_100": 36.27,
        "confidence": "low",
        "components": [
          {
            "source_id": "livebench",
            "metric": "livebench_overall_mean_judgment",
            "normalized_0_100": 36.27,
            "weight": 0.2
          }
        ],
        "derived_from": [
          "livebench:livebench_overall_mean_judgment"
        ],
        "limitations": [
          "This is an external model-intelligence prior, not a RunLocalAI local speed or fit measurement.",
          "Provider papers, human-preference ratings, and LiveBench judgments are not interchangeable with quantized local execution.",
          "Use this score only beside Will-It-Run fit, quantization, context, runtime, and measured tok/s evidence."
        ]
      },
      "scores": [
        {
          "source_id": "livebench",
          "metric": "livebench_category_coding_mean_judgment",
          "value": 0,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "coding",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench category aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_category_instruction_following_mean_judgment",
          "value": 1,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "instruction_following",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench category aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_category_language_mean_judgment",
          "value": 0.392157,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "language",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench category aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_overall_mean_judgment",
          "value": 0.362745,
          "normalized_0_100": 36.27,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench judgment aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_task_coding_completion_mean_judgment",
          "value": 0,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "coding",
          "task": "coding_completion",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench task aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_task_connections_mean_judgment",
          "value": 0,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "language",
          "task": "connections",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench task aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_task_lcb_generation_mean_judgment",
          "value": 0,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "coding",
          "task": "LCB_generation",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench task aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_task_plot_unscrambling_mean_judgment",
          "value": 0.176471,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "language",
          "task": "plot_unscrambling",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench task aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_task_story_generation_mean_judgment",
          "value": 1,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "instruction_following",
          "task": "story_generation",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench task aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_task_typos_mean_judgment",
          "value": 1,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "language",
          "task": "typos",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench task aggregate; not a RunLocalAI local execution measurement."
        }
      ]
    },
    {
      "model_key": "hunyuan-turbos-20250313",
      "display_name": "hunyuan-turbos-20250313",
      "provider": null,
      "aliases": [
        "hunyuan-turbos-20250313"
      ],
      "openness": null,
      "license": null,
      "parameter_billions": null,
      "context_window": null,
      "modalities": [
        "text"
      ],
      "source_ids": [
        "livebench"
      ],
      "composite": {
        "score_0_100": 36.21,
        "confidence": "low",
        "components": [
          {
            "source_id": "livebench",
            "metric": "livebench_overall_mean_judgment",
            "normalized_0_100": 36.21,
            "weight": 0.2
          }
        ],
        "derived_from": [
          "livebench:livebench_overall_mean_judgment"
        ],
        "limitations": [
          "This is an external model-intelligence prior, not a RunLocalAI local speed or fit measurement.",
          "Provider papers, human-preference ratings, and LiveBench judgments are not interchangeable with quantized local execution.",
          "Use this score only beside Will-It-Run fit, quantization, context, runtime, and measured tok/s evidence."
        ]
      },
      "scores": [
        {
          "source_id": "livebench",
          "metric": "livebench_category_coding_mean_judgment",
          "value": 0.5,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "coding",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench category aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_category_instruction_following_mean_judgment",
          "value": 0.333333,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "instruction_following",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench category aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_category_language_mean_judgment",
          "value": 0.312688,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "language",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench category aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_overall_mean_judgment",
          "value": 0.362097,
          "normalized_0_100": 36.21,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench judgment aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_task_coding_completion_mean_judgment",
          "value": 0,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "coding",
          "task": "coding_completion",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench task aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_task_lcb_generation_mean_judgment",
          "value": 1,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "coding",
          "task": "LCB_generation",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench task aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_task_paraphrase_mean_judgment",
          "value": 0.333333,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "instruction_following",
          "task": "paraphrase",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench task aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_task_plot_unscrambling_mean_judgment",
          "value": 0.187813,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "language",
          "task": "plot_unscrambling",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench task aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_task_typos_mean_judgment",
          "value": 0.5,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "language",
          "task": "typos",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench task aggregate; not a RunLocalAI local execution measurement."
        }
      ]
    },
    {
      "model_key": "gemma-1-1-2b-it",
      "display_name": "gemma-1.1-2b-it",
      "provider": "google",
      "aliases": [
        "gemma-1.1-2b-it"
      ],
      "openness": null,
      "license": "Gemma license",
      "parameter_billions": null,
      "context_window": null,
      "modalities": [
        "text"
      ],
      "source_ids": [
        "lmarena"
      ],
      "composite": {
        "score_0_100": 36.19,
        "confidence": "low",
        "components": [
          {
            "source_id": "lmarena",
            "metric": "lmarena_text_overall_rating",
            "normalized_0_100": 36.19,
            "weight": 0.35
          }
        ],
        "derived_from": [
          "lmarena:lmarena_text_overall_rating"
        ],
        "limitations": [
          "This is an external model-intelligence prior, not a RunLocalAI local speed or fit measurement.",
          "Provider papers, human-preference ratings, and LiveBench judgments are not interchangeable with quantized local execution.",
          "Use this score only beside Will-It-Run fit, quantization, context, runtime, and measured tok/s evidence."
        ]
      },
      "scores": [
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_chinese_rating",
          "value": 1010.5796259415224,
          "normalized_0_100": 34.8,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 313,
          "lower": 992.7339206289951,
          "upper": 1028.4253312540498,
          "category": "chinese",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_overall_rating",
          "value": 1022.2651740748628,
          "normalized_0_100": 36.19,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 345,
          "lower": 1014.5807371390003,
          "upper": 1029.9496110107252,
          "category": "overall",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        }
      ]
    },
    {
      "model_key": "wbot-4-347b-no-s",
      "display_name": "wbot-4:347b_no_s",
      "provider": null,
      "aliases": [
        "wbot-4:347b_no_s"
      ],
      "openness": null,
      "license": null,
      "parameter_billions": null,
      "context_window": null,
      "modalities": [
        "text"
      ],
      "source_ids": [
        "livebench"
      ],
      "composite": {
        "score_0_100": 35.29,
        "confidence": "low",
        "components": [
          {
            "source_id": "livebench",
            "metric": "livebench_overall_mean_judgment",
            "normalized_0_100": 35.29,
            "weight": 0.2
          }
        ],
        "derived_from": [
          "livebench:livebench_overall_mean_judgment"
        ],
        "limitations": [
          "This is an external model-intelligence prior, not a RunLocalAI local speed or fit measurement.",
          "Provider papers, human-preference ratings, and LiveBench judgments are not interchangeable with quantized local execution.",
          "Use this score only beside Will-It-Run fit, quantization, context, runtime, and measured tok/s evidence."
        ]
      },
      "scores": [
        {
          "source_id": "livebench",
          "metric": "livebench_category_coding_mean_judgment",
          "value": 0,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "coding",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench category aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_category_instruction_following_mean_judgment",
          "value": 1,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "instruction_following",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench category aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_category_language_mean_judgment",
          "value": 0.058824,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "language",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench category aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_overall_mean_judgment",
          "value": 0.352941,
          "normalized_0_100": 35.29,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench judgment aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_task_coding_completion_mean_judgment",
          "value": 0,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "coding",
          "task": "coding_completion",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench task aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_task_paraphrase_mean_judgment",
          "value": 1,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "instruction_following",
          "task": "paraphrase",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench task aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_task_plot_unscrambling_mean_judgment",
          "value": 0.058824,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "language",
          "task": "plot_unscrambling",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench task aggregate; not a RunLocalAI local execution measurement."
        }
      ]
    },
    {
      "model_key": "azerogpt",
      "display_name": "azerogpt",
      "provider": null,
      "aliases": [
        "azerogpt"
      ],
      "openness": null,
      "license": null,
      "parameter_billions": null,
      "context_window": null,
      "modalities": [
        "text"
      ],
      "source_ids": [
        "livebench"
      ],
      "composite": {
        "score_0_100": 34.83,
        "confidence": "low",
        "components": [
          {
            "source_id": "livebench",
            "metric": "livebench_overall_mean_judgment",
            "normalized_0_100": 34.83,
            "weight": 0.2
          }
        ],
        "derived_from": [
          "livebench:livebench_overall_mean_judgment"
        ],
        "limitations": [
          "This is an external model-intelligence prior, not a RunLocalAI local speed or fit measurement.",
          "Provider papers, human-preference ratings, and LiveBench judgments are not interchangeable with quantized local execution.",
          "Use this score only beside Will-It-Run fit, quantization, context, runtime, and measured tok/s evidence."
        ]
      },
      "scores": [
        {
          "source_id": "livebench",
          "metric": "livebench_category_coding_mean_judgment",
          "value": 0.5,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "coding",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench category aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_category_instruction_following_mean_judgment",
          "value": 0.2,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "instruction_following",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench category aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_category_language_mean_judgment",
          "value": 0.309596,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "language",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench category aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_overall_mean_judgment",
          "value": 0.348341,
          "normalized_0_100": 34.83,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench judgment aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_task_coding_completion_mean_judgment",
          "value": 0,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "coding",
          "task": "coding_completion",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench task aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_task_lcb_generation_mean_judgment",
          "value": 1,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "coding",
          "task": "LCB_generation",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench task aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_task_paraphrase_mean_judgment",
          "value": 0.2,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "instruction_following",
          "task": "paraphrase",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench task aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_task_plot_unscrambling_mean_judgment",
          "value": 0.119192,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "language",
          "task": "plot_unscrambling",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench task aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_task_typos_mean_judgment",
          "value": 0.5,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "language",
          "task": "typos",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench task aggregate; not a RunLocalAI local execution measurement."
        }
      ]
    },
    {
      "model_key": "openai-gpt-oss-120b",
      "display_name": "openai/gpt-oss-120b",
      "provider": "openai",
      "aliases": [
        "openai/gpt-oss-120b"
      ],
      "openness": "open",
      "license": "Unknown",
      "parameter_billions": 120.4,
      "context_window": null,
      "modalities": [
        "text"
      ],
      "source_ids": [
        "openevals"
      ],
      "composite": {
        "score_0_100": 34.1,
        "confidence": "low",
        "components": [
          {
            "source_id": "openevals",
            "metric": "openevals_aggregate",
            "normalized_0_100": 34.1,
            "weight": 0.45
          }
        ],
        "derived_from": [
          "openevals:openevals_aggregate"
        ],
        "limitations": [
          "This is an external model-intelligence prior, not a RunLocalAI local speed or fit measurement.",
          "Provider papers, human-preference ratings, and LiveBench judgments are not interchangeable with quantized local execution.",
          "Use this score only beside Will-It-Run fit, quantization, context, runtime, and measured tok/s evidence."
        ]
      },
      "scores": [
        {
          "source_id": "openevals",
          "metric": "gpqa",
          "value": 67.1,
          "normalized_0_100": 67.1,
          "scale": "0-100",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/OpenEvals/leaderboard-data",
          "source_dataset": "OpenEvals/leaderboard-data",
          "source_config": "default",
          "source_split": "train",
          "source_row_count": 105,
          "captured_at": "2026-05-28T11:57:28.766Z",
          "evidence_note": "External model quality prior from OpenEvals; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "openevals",
          "metric": "hle",
          "value": 5.2,
          "normalized_0_100": 5.2,
          "scale": "0-100",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/OpenEvals/leaderboard-data",
          "source_dataset": "OpenEvals/leaderboard-data",
          "source_config": "default",
          "source_split": "train",
          "source_row_count": 105,
          "captured_at": "2026-05-28T11:57:28.766Z",
          "evidence_note": "External model quality prior from OpenEvals; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "openevals",
          "metric": "openevals_aggregate",
          "value": 34.1,
          "normalized_0_100": 34.1,
          "scale": "0-100",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/OpenEvals/leaderboard-data",
          "source_dataset": "OpenEvals/leaderboard-data",
          "source_config": "default",
          "source_split": "train",
          "source_row_count": 105,
          "captured_at": "2026-05-28T11:57:28.766Z",
          "evidence_note": "External model quality prior from OpenEvals; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "openevals",
          "metric": "swe_pro",
          "value": 16.2,
          "normalized_0_100": 16.2,
          "scale": "0-100",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/OpenEvals/leaderboard-data",
          "source_dataset": "OpenEvals/leaderboard-data",
          "source_config": "default",
          "source_split": "train",
          "source_row_count": 105,
          "captured_at": "2026-05-28T11:57:28.766Z",
          "evidence_note": "External model quality prior from OpenEvals; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "openevals",
          "metric": "swe_verified",
          "value": 47.9,
          "normalized_0_100": 47.9,
          "scale": "0-100",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/OpenEvals/leaderboard-data",
          "source_dataset": "OpenEvals/leaderboard-data",
          "source_config": "default",
          "source_split": "train",
          "source_row_count": 105,
          "captured_at": "2026-05-28T11:57:28.766Z",
          "evidence_note": "External model quality prior from OpenEvals; not a RunLocalAI local execution measurement."
        }
      ]
    },
    {
      "model_key": "gemma-2b-it",
      "display_name": "gemma-2b-it",
      "provider": "google",
      "aliases": [
        "gemma-2b-it"
      ],
      "openness": null,
      "license": "Gemma license",
      "parameter_billions": null,
      "context_window": null,
      "modalities": [
        "text"
      ],
      "source_ids": [
        "lmarena"
      ],
      "composite": {
        "score_0_100": 33.8,
        "confidence": "low",
        "components": [
          {
            "source_id": "lmarena",
            "metric": "lmarena_text_overall_rating",
            "normalized_0_100": 33.8,
            "weight": 0.35
          }
        ],
        "derived_from": [
          "lmarena:lmarena_text_overall_rating"
        ],
        "limitations": [
          "This is an external model-intelligence prior, not a RunLocalAI local speed or fit measurement.",
          "Provider papers, human-preference ratings, and LiveBench judgments are not interchangeable with quantized local execution.",
          "Use this score only beside Will-It-Run fit, quantization, context, runtime, and measured tok/s evidence."
        ]
      },
      "scores": [
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_chinese_rating",
          "value": 984.9402274627237,
          "normalized_0_100": 31.75,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 317,
          "lower": 959.6408838762328,
          "upper": 1010.2395710492145,
          "category": "chinese",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_overall_rating",
          "value": 1002.1880263549622,
          "normalized_0_100": 33.8,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 346,
          "lower": 990.6650690244767,
          "upper": 1013.7109836854477,
          "category": "overall",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        }
      ]
    },
    {
      "model_key": "gpt-4o-2024-11-20",
      "display_name": "gpt-4o-2024-11-20",
      "provider": null,
      "aliases": [
        "gpt-4o-2024-11-20"
      ],
      "openness": null,
      "license": null,
      "parameter_billions": null,
      "context_window": null,
      "modalities": [
        "text"
      ],
      "source_ids": [
        "livebench"
      ],
      "composite": {
        "score_0_100": 33.44,
        "confidence": "low",
        "components": [
          {
            "source_id": "livebench",
            "metric": "livebench_overall_mean_judgment",
            "normalized_0_100": 33.44,
            "weight": 0.2
          }
        ],
        "derived_from": [
          "livebench:livebench_overall_mean_judgment"
        ],
        "limitations": [
          "This is an external model-intelligence prior, not a RunLocalAI local speed or fit measurement.",
          "Provider papers, human-preference ratings, and LiveBench judgments are not interchangeable with quantized local execution.",
          "Use this score only beside Will-It-Run fit, quantization, context, runtime, and measured tok/s evidence."
        ]
      },
      "scores": [
        {
          "source_id": "livebench",
          "metric": "livebench_category_coding_mean_judgment",
          "value": 0,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "coding",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench category aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_category_instruction_following_mean_judgment",
          "value": 0.583333,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "instruction_following",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench category aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_category_language_mean_judgment",
          "value": 0.368556,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "language",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench category aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_overall_mean_judgment",
          "value": 0.334383,
          "normalized_0_100": 33.44,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench judgment aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_task_connections_mean_judgment",
          "value": 1,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "language",
          "task": "connections",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench task aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_task_lcb_generation_mean_judgment",
          "value": 0,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "coding",
          "task": "LCB_generation",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench task aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_task_paraphrase_mean_judgment",
          "value": 0.166667,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "instruction_following",
          "task": "paraphrase",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench task aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_task_plot_unscrambling_mean_judgment",
          "value": 0.42139,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "language",
          "task": "plot_unscrambling",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench task aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_task_story_generation_mean_judgment",
          "value": 1,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "instruction_following",
          "task": "story_generation",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench task aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_task_typos_mean_judgment",
          "value": 0,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "language",
          "task": "typos",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench task aggregate; not a RunLocalAI local execution measurement."
        }
      ]
    },
    {
      "model_key": "openai-gpt-oss-20b",
      "display_name": "openai/gpt-oss-20b",
      "provider": "openai",
      "aliases": [
        "openai/gpt-oss-20b"
      ],
      "openness": "open",
      "license": "Unknown",
      "parameter_billions": 21.5,
      "context_window": null,
      "modalities": [
        "text"
      ],
      "source_ids": [
        "openevals"
      ],
      "composite": {
        "score_0_100": 32.8,
        "confidence": "low",
        "components": [
          {
            "source_id": "openevals",
            "metric": "openevals_aggregate",
            "normalized_0_100": 32.8,
            "weight": 0.45
          }
        ],
        "derived_from": [
          "openevals:openevals_aggregate"
        ],
        "limitations": [
          "This is an external model-intelligence prior, not a RunLocalAI local speed or fit measurement.",
          "Provider papers, human-preference ratings, and LiveBench judgments are not interchangeable with quantized local execution.",
          "Use this score only beside Will-It-Run fit, quantization, context, runtime, and measured tok/s evidence."
        ]
      },
      "scores": [
        {
          "source_id": "openevals",
          "metric": "gpqa",
          "value": 56.8,
          "normalized_0_100": 56.8,
          "scale": "0-100",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/OpenEvals/leaderboard-data",
          "source_dataset": "OpenEvals/leaderboard-data",
          "source_config": "default",
          "source_split": "train",
          "source_row_count": 105,
          "captured_at": "2026-05-28T11:57:28.766Z",
          "evidence_note": "External model quality prior from OpenEvals; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "openevals",
          "metric": "hle",
          "value": 4.2,
          "normalized_0_100": 4.2,
          "scale": "0-100",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/OpenEvals/leaderboard-data",
          "source_dataset": "OpenEvals/leaderboard-data",
          "source_config": "default",
          "source_split": "train",
          "source_row_count": 105,
          "captured_at": "2026-05-28T11:57:28.766Z",
          "evidence_note": "External model quality prior from OpenEvals; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "openevals",
          "metric": "openevals_aggregate",
          "value": 32.8,
          "normalized_0_100": 32.8,
          "scale": "0-100",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/OpenEvals/leaderboard-data",
          "source_dataset": "OpenEvals/leaderboard-data",
          "source_config": "default",
          "source_split": "train",
          "source_row_count": 105,
          "captured_at": "2026-05-28T11:57:28.766Z",
          "evidence_note": "External model quality prior from OpenEvals; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "openevals",
          "metric": "swe_verified",
          "value": 37.4,
          "normalized_0_100": 37.4,
          "scale": "0-100",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/OpenEvals/leaderboard-data",
          "source_dataset": "OpenEvals/leaderboard-data",
          "source_config": "default",
          "source_split": "train",
          "source_row_count": 105,
          "captured_at": "2026-05-28T11:57:28.766Z",
          "evidence_note": "External model quality prior from OpenEvals; not a RunLocalAI local execution measurement."
        }
      ]
    },
    {
      "model_key": "koala-13b",
      "display_name": "koala-13b",
      "provider": null,
      "aliases": [
        "koala-13b"
      ],
      "openness": null,
      "license": "Non-commercial",
      "parameter_billions": null,
      "context_window": null,
      "modalities": [
        "text"
      ],
      "source_ids": [
        "lmarena"
      ],
      "composite": {
        "score_0_100": 32.31,
        "confidence": "low",
        "components": [
          {
            "source_id": "lmarena",
            "metric": "lmarena_text_overall_rating",
            "normalized_0_100": 32.31,
            "weight": 0.35
          }
        ],
        "derived_from": [
          "lmarena:lmarena_text_overall_rating"
        ],
        "limitations": [
          "This is an external model-intelligence prior, not a RunLocalAI local speed or fit measurement.",
          "Provider papers, human-preference ratings, and LiveBench judgments are not interchangeable with quantized local execution.",
          "Use this score only beside Will-It-Run fit, quantization, context, runtime, and measured tok/s evidence."
        ]
      },
      "scores": [
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_chinese_rating",
          "value": 879.9632049632082,
          "normalized_0_100": 19.27,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 327,
          "lower": 848.6687878196238,
          "upper": 911.2576221067927,
          "category": "chinese",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_overall_rating",
          "value": 989.6489118295913,
          "normalized_0_100": 32.31,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 348,
          "lower": 979.8603260135121,
          "upper": 999.4374976456705,
          "category": "overall",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        }
      ]
    },
    {
      "model_key": "open-mixtral-8x22b",
      "display_name": "open-mixtral-8x22b",
      "provider": null,
      "aliases": [
        "open-mixtral-8x22b"
      ],
      "openness": null,
      "license": null,
      "parameter_billions": null,
      "context_window": null,
      "modalities": [
        "text"
      ],
      "source_ids": [
        "livebench"
      ],
      "composite": {
        "score_0_100": 31.93,
        "confidence": "low",
        "components": [
          {
            "source_id": "livebench",
            "metric": "livebench_overall_mean_judgment",
            "normalized_0_100": 31.93,
            "weight": 0.2
          }
        ],
        "derived_from": [
          "livebench:livebench_overall_mean_judgment"
        ],
        "limitations": [
          "This is an external model-intelligence prior, not a RunLocalAI local speed or fit measurement.",
          "Provider papers, human-preference ratings, and LiveBench judgments are not interchangeable with quantized local execution.",
          "Use this score only beside Will-It-Run fit, quantization, context, runtime, and measured tok/s evidence."
        ]
      },
      "scores": [
        {
          "source_id": "livebench",
          "metric": "livebench_category_instruction_following_mean_judgment",
          "value": 1,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "instruction_following",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench category aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_category_language_mean_judgment",
          "value": 0.092344,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "language",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench category aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_overall_mean_judgment",
          "value": 0.319258,
          "normalized_0_100": 31.93,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench judgment aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_task_paraphrase_mean_judgment",
          "value": 1,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "instruction_following",
          "task": "paraphrase",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench task aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_task_plot_unscrambling_mean_judgment",
          "value": 0.138517,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "language",
          "task": "plot_unscrambling",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench task aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_task_typos_mean_judgment",
          "value": 0,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "language",
          "task": "typos",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench task aggregate; not a RunLocalAI local execution measurement."
        }
      ]
    },
    {
      "model_key": "miromind-ai-mirothinker-v1-5-30b",
      "display_name": "miromind-ai/MiroThinker-v1.5-30B",
      "provider": "miromind-ai",
      "aliases": [
        "miromind-ai/MiroThinker-v1.5-30B"
      ],
      "openness": "open",
      "license": "Unknown",
      "parameter_billions": 30.5,
      "context_window": null,
      "modalities": [
        "text"
      ],
      "source_ids": [
        "openevals"
      ],
      "composite": {
        "score_0_100": 31,
        "confidence": "low",
        "components": [
          {
            "source_id": "openevals",
            "metric": "openevals_aggregate",
            "normalized_0_100": 31,
            "weight": 0.45
          }
        ],
        "derived_from": [
          "openevals:openevals_aggregate"
        ],
        "limitations": [
          "This is an external model-intelligence prior, not a RunLocalAI local speed or fit measurement.",
          "Provider papers, human-preference ratings, and LiveBench judgments are not interchangeable with quantized local execution.",
          "Use this score only beside Will-It-Run fit, quantization, context, runtime, and measured tok/s evidence."
        ]
      },
      "scores": [
        {
          "source_id": "openevals",
          "metric": "hle",
          "value": 31,
          "normalized_0_100": 31,
          "scale": "0-100",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/OpenEvals/leaderboard-data",
          "source_dataset": "OpenEvals/leaderboard-data",
          "source_config": "default",
          "source_split": "train",
          "source_row_count": 105,
          "captured_at": "2026-05-28T11:57:28.766Z",
          "evidence_note": "External model quality prior from OpenEvals; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "openevals",
          "metric": "openevals_aggregate",
          "value": 31,
          "normalized_0_100": 31,
          "scale": "0-100",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/OpenEvals/leaderboard-data",
          "source_dataset": "OpenEvals/leaderboard-data",
          "source_config": "default",
          "source_split": "train",
          "source_row_count": 105,
          "captured_at": "2026-05-28T11:57:28.766Z",
          "evidence_note": "External model quality prior from OpenEvals; not a RunLocalAI local execution measurement."
        }
      ]
    },
    {
      "model_key": "vicuna-7b-v1-5",
      "display_name": "vicuna-7b-v1.5",
      "provider": null,
      "aliases": [
        "vicuna-7b-v1.5"
      ],
      "openness": null,
      "license": null,
      "parameter_billions": null,
      "context_window": null,
      "modalities": [
        "text"
      ],
      "source_ids": [
        "livebench"
      ],
      "composite": {
        "score_0_100": 30.88,
        "confidence": "low",
        "components": [
          {
            "source_id": "livebench",
            "metric": "livebench_overall_mean_judgment",
            "normalized_0_100": 30.88,
            "weight": 0.2
          }
        ],
        "derived_from": [
          "livebench:livebench_overall_mean_judgment"
        ],
        "limitations": [
          "This is an external model-intelligence prior, not a RunLocalAI local speed or fit measurement.",
          "Provider papers, human-preference ratings, and LiveBench judgments are not interchangeable with quantized local execution.",
          "Use this score only beside Will-It-Run fit, quantization, context, runtime, and measured tok/s evidence."
        ]
      },
      "scores": [
        {
          "source_id": "livebench",
          "metric": "livebench_category_coding_mean_judgment",
          "value": 0,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "coding",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench category aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_category_instruction_following_mean_judgment",
          "value": 1,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "instruction_following",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench category aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_category_language_mean_judgment",
          "value": 0.117647,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "language",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench category aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_overall_mean_judgment",
          "value": 0.308824,
          "normalized_0_100": 30.88,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench judgment aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_task_coding_completion_mean_judgment",
          "value": 0,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "coding",
          "task": "coding_completion",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench task aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_task_connections_mean_judgment",
          "value": 0,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "language",
          "task": "connections",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench task aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_task_plot_unscrambling_mean_judgment",
          "value": 0.235294,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "language",
          "task": "plot_unscrambling",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench task aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_task_story_generation_mean_judgment",
          "value": 1,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "instruction_following",
          "task": "story_generation",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench task aggregate; not a RunLocalAI local execution measurement."
        }
      ]
    },
    {
      "model_key": "o1-mini-2024-09-12",
      "display_name": "o1-mini-2024-09-12",
      "provider": null,
      "aliases": [
        "o1-mini-2024-09-12"
      ],
      "openness": null,
      "license": null,
      "parameter_billions": null,
      "context_window": null,
      "modalities": [
        "text"
      ],
      "source_ids": [
        "livebench"
      ],
      "composite": {
        "score_0_100": 30.52,
        "confidence": "low",
        "components": [
          {
            "source_id": "livebench",
            "metric": "livebench_overall_mean_judgment",
            "normalized_0_100": 30.52,
            "weight": 0.2
          }
        ],
        "derived_from": [
          "livebench:livebench_overall_mean_judgment"
        ],
        "limitations": [
          "This is an external model-intelligence prior, not a RunLocalAI local speed or fit measurement.",
          "Provider papers, human-preference ratings, and LiveBench judgments are not interchangeable with quantized local execution.",
          "Use this score only beside Will-It-Run fit, quantization, context, runtime, and measured tok/s evidence."
        ]
      },
      "scores": [
        {
          "source_id": "livebench",
          "metric": "livebench_category_coding_mean_judgment",
          "value": 0,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "coding",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench category aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_category_instruction_following_mean_judgment",
          "value": 0.666667,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "instruction_following",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench category aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_category_language_mean_judgment",
          "value": 0.235547,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "language",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench category aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_overall_mean_judgment",
          "value": 0.30518,
          "normalized_0_100": 30.52,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench judgment aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_task_coding_completion_mean_judgment",
          "value": 0,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "coding",
          "task": "coding_completion",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench task aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_task_connections_mean_judgment",
          "value": 0,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "language",
          "task": "connections",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench task aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_task_paraphrase_mean_judgment",
          "value": 0.333333,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "instruction_following",
          "task": "paraphrase",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench task aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_task_plot_unscrambling_mean_judgment",
          "value": 0.137762,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "language",
          "task": "plot_unscrambling",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench task aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_task_story_generation_mean_judgment",
          "value": 1,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "instruction_following",
          "task": "story_generation",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench task aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_task_typos_mean_judgment",
          "value": 0.5,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "language",
          "task": "typos",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench task aggregate; not a RunLocalAI local execution measurement."
        }
      ]
    },
    {
      "model_key": "chatglm3-6b",
      "display_name": "chatglm3-6b",
      "provider": null,
      "aliases": [
        "chatglm3-6b"
      ],
      "openness": null,
      "license": "Apache-2.0",
      "parameter_billions": null,
      "context_window": null,
      "modalities": [
        "text"
      ],
      "source_ids": [
        "lmarena"
      ],
      "composite": {
        "score_0_100": 30.22,
        "confidence": "low",
        "components": [
          {
            "source_id": "lmarena",
            "metric": "lmarena_text_overall_rating",
            "normalized_0_100": 30.22,
            "weight": 0.35
          }
        ],
        "derived_from": [
          "lmarena:lmarena_text_overall_rating"
        ],
        "limitations": [
          "This is an external model-intelligence prior, not a RunLocalAI local speed or fit measurement.",
          "Provider papers, human-preference ratings, and LiveBench judgments are not interchangeable with quantized local execution.",
          "Use this score only beside Will-It-Run fit, quantization, context, runtime, and measured tok/s evidence."
        ]
      },
      "scores": [
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_chinese_rating",
          "value": 1063.194960457497,
          "normalized_0_100": 41.06,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 293,
          "lower": 1020.1451058314772,
          "upper": 1106.244815083517,
          "category": "chinese",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_overall_rating",
          "value": 972.0784071594135,
          "normalized_0_100": 30.22,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 349,
          "lower": 960.476119568621,
          "upper": 983.6806947502059,
          "category": "overall",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        }
      ]
    },
    {
      "model_key": "mistral-7b-instruct-v0-3",
      "display_name": "Mistral-7B-Instruct-v0.3",
      "provider": null,
      "aliases": [
        "Mistral-7B-Instruct-v0.3"
      ],
      "openness": null,
      "license": null,
      "parameter_billions": null,
      "context_window": null,
      "modalities": [
        "text"
      ],
      "source_ids": [
        "livebench"
      ],
      "composite": {
        "score_0_100": 29.93,
        "confidence": "low",
        "components": [
          {
            "source_id": "livebench",
            "metric": "livebench_overall_mean_judgment",
            "normalized_0_100": 29.93,
            "weight": 0.2
          }
        ],
        "derived_from": [
          "livebench:livebench_overall_mean_judgment"
        ],
        "limitations": [
          "This is an external model-intelligence prior, not a RunLocalAI local speed or fit measurement.",
          "Provider papers, human-preference ratings, and LiveBench judgments are not interchangeable with quantized local execution.",
          "Use this score only beside Will-It-Run fit, quantization, context, runtime, and measured tok/s evidence."
        ]
      },
      "scores": [
        {
          "source_id": "livebench",
          "metric": "livebench_category_coding_mean_judgment",
          "value": 0.25,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "coding",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench category aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_category_instruction_following_mean_judgment",
          "value": 1,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "instruction_following",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench category aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_category_language_mean_judgment",
          "value": 0.047619,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "language",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench category aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_overall_mean_judgment",
          "value": 0.29932,
          "normalized_0_100": 29.93,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench judgment aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_task_coding_completion_mean_judgment",
          "value": 0,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "coding",
          "task": "coding_completion",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench task aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_task_connections_mean_judgment",
          "value": 0,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "language",
          "task": "connections",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench task aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_task_lcb_generation_mean_judgment",
          "value": 0.333333,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "coding",
          "task": "LCB_generation",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench task aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_task_plot_unscrambling_mean_judgment",
          "value": 0.095238,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "language",
          "task": "plot_unscrambling",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench task aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_task_story_generation_mean_judgment",
          "value": 1,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "instruction_following",
          "task": "story_generation",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench task aggregate; not a RunLocalAI local execution measurement."
        }
      ]
    },
    {
      "model_key": "meta-llama-3-1-70b-instruct-turbo",
      "display_name": "meta-llama-3.1-70b-instruct-turbo",
      "provider": null,
      "aliases": [
        "Meta-Llama-3.1-70B-Instruct-Turbo",
        "meta-llama-3.1-70b-instruct-turbo"
      ],
      "openness": null,
      "license": null,
      "parameter_billions": null,
      "context_window": null,
      "modalities": [
        "text"
      ],
      "source_ids": [
        "livebench"
      ],
      "composite": {
        "score_0_100": 29.15,
        "confidence": "low",
        "components": [
          {
            "source_id": "livebench",
            "metric": "livebench_overall_mean_judgment",
            "normalized_0_100": 29.15,
            "weight": 0.2
          }
        ],
        "derived_from": [
          "livebench:livebench_overall_mean_judgment"
        ],
        "limitations": [
          "This is an external model-intelligence prior, not a RunLocalAI local speed or fit measurement.",
          "Provider papers, human-preference ratings, and LiveBench judgments are not interchangeable with quantized local execution.",
          "Use this score only beside Will-It-Run fit, quantization, context, runtime, and measured tok/s evidence."
        ]
      },
      "scores": [
        {
          "source_id": "livebench",
          "metric": "livebench_category_coding_mean_judgment",
          "value": 0,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "coding",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench category aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_category_coding_mean_judgment",
          "value": 0.666667,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "coding",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench category aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_category_instruction_following_mean_judgment",
          "value": 0.333333,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "instruction_following",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench category aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_category_instruction_following_mean_judgment",
          "value": 1,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "instruction_following",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench category aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_category_language_mean_judgment",
          "value": 0.341396,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "language",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench category aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_category_language_mean_judgment",
          "value": 0.642857,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "language",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench category aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_overall_mean_judgment",
          "value": 0.291473,
          "normalized_0_100": 29.15,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench judgment aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_overall_mean_judgment",
          "value": 0.714286,
          "normalized_0_100": 71.43,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench judgment aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_task_coding_completion_mean_judgment",
          "value": 0,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "coding",
          "task": "coding_completion",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench task aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_task_connections_mean_judgment",
          "value": 1,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "language",
          "task": "connections",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench task aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_task_lcb_generation_mean_judgment",
          "value": 0.666667,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "coding",
          "task": "LCB_generation",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench task aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_task_paraphrase_mean_judgment",
          "value": 0.333333,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "instruction_following",
          "task": "paraphrase",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench task aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_task_plot_unscrambling_mean_judgment",
          "value": 0.23566,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "language",
          "task": "plot_unscrambling",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench task aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_task_plot_unscrambling_mean_judgment",
          "value": 0.285714,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "language",
          "task": "plot_unscrambling",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench task aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_task_story_generation_mean_judgment",
          "value": 1,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "instruction_following",
          "task": "story_generation",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench task aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_task_typos_mean_judgment",
          "value": 0.5,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "language",
          "task": "typos",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench task aggregate; not a RunLocalAI local execution measurement."
        }
      ]
    },
    {
      "model_key": "llama-2-7b-chat-hf",
      "display_name": "Llama-2-7b-chat-hf",
      "provider": null,
      "aliases": [
        "Llama-2-7b-chat-hf"
      ],
      "openness": null,
      "license": null,
      "parameter_billions": null,
      "context_window": null,
      "modalities": [
        "text"
      ],
      "source_ids": [
        "livebench"
      ],
      "composite": {
        "score_0_100": 28.81,
        "confidence": "low",
        "components": [
          {
            "source_id": "livebench",
            "metric": "livebench_overall_mean_judgment",
            "normalized_0_100": 28.81,
            "weight": 0.2
          }
        ],
        "derived_from": [
          "livebench:livebench_overall_mean_judgment"
        ],
        "limitations": [
          "This is an external model-intelligence prior, not a RunLocalAI local speed or fit measurement.",
          "Provider papers, human-preference ratings, and LiveBench judgments are not interchangeable with quantized local execution.",
          "Use this score only beside Will-It-Run fit, quantization, context, runtime, and measured tok/s evidence."
        ]
      },
      "scores": [
        {
          "source_id": "livebench",
          "metric": "livebench_category_coding_mean_judgment",
          "value": 0,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "coding",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench category aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_category_instruction_following_mean_judgment",
          "value": 0.625,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "instruction_following",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench category aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_category_language_mean_judgment",
          "value": 0.095238,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "language",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench category aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_overall_mean_judgment",
          "value": 0.288095,
          "normalized_0_100": 28.81,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench judgment aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_task_coding_completion_mean_judgment",
          "value": 0,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "coding",
          "task": "coding_completion",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench task aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_task_connections_mean_judgment",
          "value": 0,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "language",
          "task": "connections",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench task aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_task_plot_unscrambling_mean_judgment",
          "value": 0.190476,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "language",
          "task": "plot_unscrambling",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench task aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_task_story_generation_mean_judgment",
          "value": 0.625,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "instruction_following",
          "task": "story_generation",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench task aggregate; not a RunLocalAI local execution measurement."
        }
      ]
    },
    {
      "model_key": "phi-3-small-128k-instruct",
      "display_name": "Phi-3-small-128k-instruct",
      "provider": null,
      "aliases": [
        "Phi-3-small-128k-instruct"
      ],
      "openness": null,
      "license": null,
      "parameter_billions": null,
      "context_window": null,
      "modalities": [
        "text"
      ],
      "source_ids": [
        "livebench"
      ],
      "composite": {
        "score_0_100": 28.57,
        "confidence": "low",
        "components": [
          {
            "source_id": "livebench",
            "metric": "livebench_overall_mean_judgment",
            "normalized_0_100": 28.57,
            "weight": 0.2
          }
        ],
        "derived_from": [
          "livebench:livebench_overall_mean_judgment"
        ],
        "limitations": [
          "This is an external model-intelligence prior, not a RunLocalAI local speed or fit measurement.",
          "Provider papers, human-preference ratings, and LiveBench judgments are not interchangeable with quantized local execution.",
          "Use this score only beside Will-It-Run fit, quantization, context, runtime, and measured tok/s evidence."
        ]
      },
      "scores": [
        {
          "source_id": "livebench",
          "metric": "livebench_category_coding_mean_judgment",
          "value": 0.2,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "coding",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench category aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_category_instruction_following_mean_judgment",
          "value": 1,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "instruction_following",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench category aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_category_language_mean_judgment",
          "value": 0,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "language",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench category aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_overall_mean_judgment",
          "value": 0.285714,
          "normalized_0_100": 28.57,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench judgment aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_task_coding_completion_mean_judgment",
          "value": 0,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "coding",
          "task": "coding_completion",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench task aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_task_connections_mean_judgment",
          "value": 0,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "language",
          "task": "connections",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench task aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_task_lcb_generation_mean_judgment",
          "value": 0.25,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "coding",
          "task": "LCB_generation",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench task aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_task_story_generation_mean_judgment",
          "value": 1,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "instruction_following",
          "task": "story_generation",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench task aggregate; not a RunLocalAI local execution measurement."
        }
      ]
    },
    {
      "model_key": "gpt4all-13b-snoozy",
      "display_name": "gpt4all-13b-snoozy",
      "provider": null,
      "aliases": [
        "gpt4all-13b-snoozy"
      ],
      "openness": null,
      "license": "Non-commercial",
      "parameter_billions": null,
      "context_window": null,
      "modalities": [
        "text"
      ],
      "source_ids": [
        "lmarena"
      ],
      "composite": {
        "score_0_100": 28.34,
        "confidence": "low",
        "components": [
          {
            "source_id": "lmarena",
            "metric": "lmarena_text_overall_rating",
            "normalized_0_100": 28.34,
            "weight": 0.35
          }
        ],
        "derived_from": [
          "lmarena:lmarena_text_overall_rating"
        ],
        "limitations": [
          "This is an external model-intelligence prior, not a RunLocalAI local speed or fit measurement.",
          "Provider papers, human-preference ratings, and LiveBench judgments are not interchangeable with quantized local execution.",
          "Use this score only beside Will-It-Run fit, quantization, context, runtime, and measured tok/s evidence."
        ]
      },
      "scores": [
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_overall_rating",
          "value": 956.2210529097321,
          "normalized_0_100": 28.34,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 350,
          "lower": 941.099891034612,
          "upper": 971.3422147848521,
          "category": "overall",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        }
      ]
    },
    {
      "model_key": "mpt-7b-chat",
      "display_name": "mpt-7b-chat",
      "provider": null,
      "aliases": [
        "mpt-7b-chat"
      ],
      "openness": null,
      "license": "CC-BY-NC-SA-4.0",
      "parameter_billions": null,
      "context_window": null,
      "modalities": [
        "text"
      ],
      "source_ids": [
        "lmarena"
      ],
      "composite": {
        "score_0_100": 28.22,
        "confidence": "low",
        "components": [
          {
            "source_id": "lmarena",
            "metric": "lmarena_text_overall_rating",
            "normalized_0_100": 28.22,
            "weight": 0.35
          }
        ],
        "derived_from": [
          "lmarena:lmarena_text_overall_rating"
        ],
        "limitations": [
          "This is an external model-intelligence prior, not a RunLocalAI local speed or fit measurement.",
          "Provider papers, human-preference ratings, and LiveBench judgments are not interchangeable with quantized local execution.",
          "Use this score only beside Will-It-Run fit, quantization, context, runtime, and measured tok/s evidence."
        ]
      },
      "scores": [
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_chinese_rating",
          "value": 953.7650495502411,
          "normalized_0_100": 28.04,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 323,
          "lower": 912.0124942053596,
          "upper": 995.5176048951224,
          "category": "chinese",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_overall_rating",
          "value": 955.258702546279,
          "normalized_0_100": 28.22,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 351,
          "lower": 943.4455341431925,
          "upper": 967.0718709493653,
          "category": "overall",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        }
      ]
    },
    {
      "model_key": "vicuna-7b-v1-5-16k",
      "display_name": "vicuna-7b-v1.5-16k",
      "provider": null,
      "aliases": [
        "vicuna-7b-v1.5-16k"
      ],
      "openness": null,
      "license": null,
      "parameter_billions": null,
      "context_window": null,
      "modalities": [
        "text"
      ],
      "source_ids": [
        "livebench"
      ],
      "composite": {
        "score_0_100": 27.94,
        "confidence": "low",
        "components": [
          {
            "source_id": "livebench",
            "metric": "livebench_overall_mean_judgment",
            "normalized_0_100": 27.94,
            "weight": 0.2
          }
        ],
        "derived_from": [
          "livebench:livebench_overall_mean_judgment"
        ],
        "limitations": [
          "This is an external model-intelligence prior, not a RunLocalAI local speed or fit measurement.",
          "Provider papers, human-preference ratings, and LiveBench judgments are not interchangeable with quantized local execution.",
          "Use this score only beside Will-It-Run fit, quantization, context, runtime, and measured tok/s evidence."
        ]
      },
      "scores": [
        {
          "source_id": "livebench",
          "metric": "livebench_category_coding_mean_judgment",
          "value": 0,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "coding",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench category aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_category_instruction_following_mean_judgment",
          "value": 1,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "instruction_following",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench category aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_category_language_mean_judgment",
          "value": 0.058824,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "language",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench category aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_overall_mean_judgment",
          "value": 0.279412,
          "normalized_0_100": 27.94,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench judgment aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_task_coding_completion_mean_judgment",
          "value": 0,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "coding",
          "task": "coding_completion",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench task aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_task_connections_mean_judgment",
          "value": 0,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "language",
          "task": "connections",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench task aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_task_plot_unscrambling_mean_judgment",
          "value": 0.117647,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "language",
          "task": "plot_unscrambling",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench task aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_task_story_generation_mean_judgment",
          "value": 1,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "instruction_following",
          "task": "story_generation",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench task aggregate; not a RunLocalAI local execution measurement."
        }
      ]
    },
    {
      "model_key": "moonshotai-kimi-k2-instruct",
      "display_name": "moonshotai/Kimi-K2-Instruct",
      "provider": "moonshotai",
      "aliases": [
        "moonshotai/Kimi-K2-Instruct"
      ],
      "openness": "open",
      "license": "Unknown",
      "parameter_billions": 1026.5,
      "context_window": null,
      "modalities": [
        "text"
      ],
      "source_ids": [
        "openevals"
      ],
      "composite": {
        "score_0_100": 27.73,
        "confidence": "low",
        "components": [
          {
            "source_id": "openevals",
            "metric": "openevals_aggregate",
            "normalized_0_100": 27.73,
            "weight": 0.45
          }
        ],
        "derived_from": [
          "openevals:openevals_aggregate"
        ],
        "limitations": [
          "This is an external model-intelligence prior, not a RunLocalAI local speed or fit measurement.",
          "Provider papers, human-preference ratings, and LiveBench judgments are not interchangeable with quantized local execution.",
          "Use this score only beside Will-It-Run fit, quantization, context, runtime, and measured tok/s evidence."
        ]
      },
      "scores": [
        {
          "source_id": "openevals",
          "metric": "openevals_aggregate",
          "value": 27.73,
          "normalized_0_100": 27.73,
          "scale": "0-100",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/OpenEvals/leaderboard-data",
          "source_dataset": "OpenEvals/leaderboard-data",
          "source_config": "default",
          "source_split": "train",
          "source_row_count": 105,
          "captured_at": "2026-05-28T11:57:28.766Z",
          "evidence_note": "External model quality prior from OpenEvals; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "openevals",
          "metric": "swe_pro",
          "value": 27.67,
          "normalized_0_100": 27.67,
          "scale": "0-100",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/OpenEvals/leaderboard-data",
          "source_dataset": "OpenEvals/leaderboard-data",
          "source_config": "default",
          "source_split": "train",
          "source_row_count": 105,
          "captured_at": "2026-05-28T11:57:28.766Z",
          "evidence_note": "External model quality prior from OpenEvals; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "openevals",
          "metric": "terminal_bench",
          "value": 27.8,
          "normalized_0_100": 27.8,
          "scale": "0-100",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/OpenEvals/leaderboard-data",
          "source_dataset": "OpenEvals/leaderboard-data",
          "source_config": "default",
          "source_split": "train",
          "source_row_count": 105,
          "captured_at": "2026-05-28T11:57:28.766Z",
          "evidence_note": "External model quality prior from OpenEvals; not a RunLocalAI local execution measurement."
        }
      ]
    },
    {
      "model_key": "rwkv-4-raven-14b",
      "display_name": "RWKV-4-Raven-14B",
      "provider": null,
      "aliases": [
        "RWKV-4-Raven-14B"
      ],
      "openness": null,
      "license": "Apache 2.0",
      "parameter_billions": null,
      "context_window": null,
      "modalities": [
        "text"
      ],
      "source_ids": [
        "lmarena"
      ],
      "composite": {
        "score_0_100": 27.41,
        "confidence": "low",
        "components": [
          {
            "source_id": "lmarena",
            "metric": "lmarena_text_overall_rating",
            "normalized_0_100": 27.41,
            "weight": 0.35
          }
        ],
        "derived_from": [
          "lmarena:lmarena_text_overall_rating"
        ],
        "limitations": [
          "This is an external model-intelligence prior, not a RunLocalAI local speed or fit measurement.",
          "Provider papers, human-preference ratings, and LiveBench judgments are not interchangeable with quantized local execution.",
          "Use this score only beside Will-It-Run fit, quantization, context, runtime, and measured tok/s evidence."
        ]
      },
      "scores": [
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_chinese_rating",
          "value": 909.6256630042981,
          "normalized_0_100": 22.8,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 325,
          "lower": 871.1022376994279,
          "upper": 948.1490883091683,
          "category": "chinese",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_overall_rating",
          "value": 948.4255783824063,
          "normalized_0_100": 27.41,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 352,
          "lower": 937.088197409086,
          "upper": 959.7629593557267,
          "category": "overall",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        }
      ]
    },
    {
      "model_key": "nvidia-nemotron-terminal-32b",
      "display_name": "nvidia/Nemotron-Terminal-32B",
      "provider": "nvidia",
      "aliases": [
        "nvidia/Nemotron-Terminal-32B"
      ],
      "openness": "open",
      "license": "Unknown",
      "parameter_billions": 32.8,
      "context_window": null,
      "modalities": [
        "text"
      ],
      "source_ids": [
        "openevals"
      ],
      "composite": {
        "score_0_100": 27.4,
        "confidence": "low",
        "components": [
          {
            "source_id": "openevals",
            "metric": "openevals_aggregate",
            "normalized_0_100": 27.4,
            "weight": 0.45
          }
        ],
        "derived_from": [
          "openevals:openevals_aggregate"
        ],
        "limitations": [
          "This is an external model-intelligence prior, not a RunLocalAI local speed or fit measurement.",
          "Provider papers, human-preference ratings, and LiveBench judgments are not interchangeable with quantized local execution.",
          "Use this score only beside Will-It-Run fit, quantization, context, runtime, and measured tok/s evidence."
        ]
      },
      "scores": [
        {
          "source_id": "openevals",
          "metric": "openevals_aggregate",
          "value": 27.4,
          "normalized_0_100": 27.4,
          "scale": "0-100",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/OpenEvals/leaderboard-data",
          "source_dataset": "OpenEvals/leaderboard-data",
          "source_config": "default",
          "source_split": "train",
          "source_row_count": 105,
          "captured_at": "2026-05-28T11:57:28.766Z",
          "evidence_note": "External model quality prior from OpenEvals; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "openevals",
          "metric": "terminal_bench",
          "value": 27.4,
          "normalized_0_100": 27.4,
          "scale": "0-100",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/OpenEvals/leaderboard-data",
          "source_dataset": "OpenEvals/leaderboard-data",
          "source_config": "default",
          "source_split": "train",
          "source_row_count": 105,
          "captured_at": "2026-05-28T11:57:28.766Z",
          "evidence_note": "External model quality prior from OpenEvals; not a RunLocalAI local execution measurement."
        }
      ]
    },
    {
      "model_key": "open-mistral-nemo",
      "display_name": "open-mistral-nemo",
      "provider": null,
      "aliases": [
        "open-mistral-nemo"
      ],
      "openness": null,
      "license": null,
      "parameter_billions": null,
      "context_window": null,
      "modalities": [
        "text"
      ],
      "source_ids": [
        "livebench"
      ],
      "composite": {
        "score_0_100": 27.06,
        "confidence": "low",
        "components": [
          {
            "source_id": "livebench",
            "metric": "livebench_overall_mean_judgment",
            "normalized_0_100": 27.06,
            "weight": 0.2
          }
        ],
        "derived_from": [
          "livebench:livebench_overall_mean_judgment"
        ],
        "limitations": [
          "This is an external model-intelligence prior, not a RunLocalAI local speed or fit measurement.",
          "Provider papers, human-preference ratings, and LiveBench judgments are not interchangeable with quantized local execution.",
          "Use this score only beside Will-It-Run fit, quantization, context, runtime, and measured tok/s evidence."
        ]
      },
      "scores": [
        {
          "source_id": "livebench",
          "metric": "livebench_category_coding_mean_judgment",
          "value": 0,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "coding",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench category aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_category_instruction_following_mean_judgment",
          "value": 1,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "instruction_following",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench category aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_category_language_mean_judgment",
          "value": 0.117647,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "language",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench category aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_overall_mean_judgment",
          "value": 0.270588,
          "normalized_0_100": 27.06,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench judgment aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_task_connections_mean_judgment",
          "value": 0,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "language",
          "task": "connections",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench task aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_task_lcb_generation_mean_judgment",
          "value": 0,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "coding",
          "task": "LCB_generation",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench task aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_task_plot_unscrambling_mean_judgment",
          "value": 0.352941,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "language",
          "task": "plot_unscrambling",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench task aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_task_story_generation_mean_judgment",
          "value": 1,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "instruction_following",
          "task": "story_generation",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench task aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_task_typos_mean_judgment",
          "value": 0,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "language",
          "task": "typos",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench task aggregate; not a RunLocalAI local execution measurement."
        }
      ]
    },
    {
      "model_key": "gemini-1-5-flash-exp-0827",
      "display_name": "gemini-1.5-flash-exp-0827",
      "provider": null,
      "aliases": [
        "gemini-1.5-flash-exp-0827"
      ],
      "openness": null,
      "license": null,
      "parameter_billions": null,
      "context_window": null,
      "modalities": [
        "text"
      ],
      "source_ids": [
        "livebench"
      ],
      "composite": {
        "score_0_100": 26.72,
        "confidence": "low",
        "components": [
          {
            "source_id": "livebench",
            "metric": "livebench_overall_mean_judgment",
            "normalized_0_100": 26.72,
            "weight": 0.2
          }
        ],
        "derived_from": [
          "livebench:livebench_overall_mean_judgment"
        ],
        "limitations": [
          "This is an external model-intelligence prior, not a RunLocalAI local speed or fit measurement.",
          "Provider papers, human-preference ratings, and LiveBench judgments are not interchangeable with quantized local execution.",
          "Use this score only beside Will-It-Run fit, quantization, context, runtime, and measured tok/s evidence."
        ]
      },
      "scores": [
        {
          "source_id": "livebench",
          "metric": "livebench_category_coding_mean_judgment",
          "value": 0,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "coding",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench category aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_category_instruction_following_mean_judgment",
          "value": 0.7,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "instruction_following",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench category aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_category_language_mean_judgment",
          "value": 0.156863,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "language",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench category aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_overall_mean_judgment",
          "value": 0.267227,
          "normalized_0_100": 26.72,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench judgment aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_task_connections_mean_judgment",
          "value": 0,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "language",
          "task": "connections",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench task aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_task_lcb_generation_mean_judgment",
          "value": 0,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "coding",
          "task": "LCB_generation",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench task aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_task_paraphrase_mean_judgment",
          "value": 0.4,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "instruction_following",
          "task": "paraphrase",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench task aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_task_plot_unscrambling_mean_judgment",
          "value": 0.470588,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "language",
          "task": "plot_unscrambling",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench task aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_task_story_generation_mean_judgment",
          "value": 1,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "instruction_following",
          "task": "story_generation",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench task aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_task_typos_mean_judgment",
          "value": 0,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "language",
          "task": "typos",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench task aggregate; not a RunLocalAI local execution measurement."
        }
      ]
    },
    {
      "model_key": "chatglm2-6b",
      "display_name": "chatglm2-6b",
      "provider": null,
      "aliases": [
        "chatglm2-6b"
      ],
      "openness": null,
      "license": "Apache-2.0",
      "parameter_billions": null,
      "context_window": null,
      "modalities": [
        "text"
      ],
      "source_ids": [
        "lmarena"
      ],
      "composite": {
        "score_0_100": 26.25,
        "confidence": "low",
        "components": [
          {
            "source_id": "lmarena",
            "metric": "lmarena_text_overall_rating",
            "normalized_0_100": 26.25,
            "weight": 0.35
          }
        ],
        "derived_from": [
          "lmarena:lmarena_text_overall_rating"
        ],
        "limitations": [
          "This is an external model-intelligence prior, not a RunLocalAI local speed or fit measurement.",
          "Provider papers, human-preference ratings, and LiveBench judgments are not interchangeable with quantized local execution.",
          "Use this score only beside Will-It-Run fit, quantization, context, runtime, and measured tok/s evidence."
        ]
      },
      "scores": [
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_overall_rating",
          "value": 938.667241463698,
          "normalized_0_100": 26.25,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 353,
          "lower": 925.151726381844,
          "upper": 952.1827565455519,
          "category": "overall",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        }
      ]
    },
    {
      "model_key": "mistral-small-2409",
      "display_name": "mistral-small-2409",
      "provider": null,
      "aliases": [
        "mistral-small-2409"
      ],
      "openness": null,
      "license": null,
      "parameter_billions": null,
      "context_window": null,
      "modalities": [
        "text"
      ],
      "source_ids": [
        "livebench"
      ],
      "composite": {
        "score_0_100": 25.82,
        "confidence": "low",
        "components": [
          {
            "source_id": "livebench",
            "metric": "livebench_overall_mean_judgment",
            "normalized_0_100": 25.82,
            "weight": 0.2
          }
        ],
        "derived_from": [
          "livebench:livebench_overall_mean_judgment"
        ],
        "limitations": [
          "This is an external model-intelligence prior, not a RunLocalAI local speed or fit measurement.",
          "Provider papers, human-preference ratings, and LiveBench judgments are not interchangeable with quantized local execution.",
          "Use this score only beside Will-It-Run fit, quantization, context, runtime, and measured tok/s evidence."
        ]
      },
      "scores": [
        {
          "source_id": "livebench",
          "metric": "livebench_category_coding_mean_judgment",
          "value": 0,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "coding",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench category aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_category_instruction_following_mean_judgment",
          "value": 0.166667,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "instruction_following",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench category aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_category_language_mean_judgment",
          "value": 0.328089,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "language",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench category aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_overall_mean_judgment",
          "value": 0.258159,
          "normalized_0_100": 25.82,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench judgment aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_task_coding_completion_mean_judgment",
          "value": 0,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "coding",
          "task": "coding_completion",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench task aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_task_paraphrase_mean_judgment",
          "value": 0.166667,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "instruction_following",
          "task": "paraphrase",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench task aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_task_plot_unscrambling_mean_judgment",
          "value": 0.213482,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "language",
          "task": "plot_unscrambling",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench task aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_task_typos_mean_judgment",
          "value": 0.5,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "language",
          "task": "typos",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench task aggregate; not a RunLocalAI local execution measurement."
        }
      ]
    },
    {
      "model_key": "alpaca-13b",
      "display_name": "alpaca-13b",
      "provider": null,
      "aliases": [
        "alpaca-13b"
      ],
      "openness": null,
      "license": "Non-commercial",
      "parameter_billions": null,
      "context_window": null,
      "modalities": [
        "text"
      ],
      "source_ids": [
        "lmarena"
      ],
      "composite": {
        "score_0_100": 25.56,
        "confidence": "low",
        "components": [
          {
            "source_id": "lmarena",
            "metric": "lmarena_text_overall_rating",
            "normalized_0_100": 25.56,
            "weight": 0.35
          }
        ],
        "derived_from": [
          "lmarena:lmarena_text_overall_rating"
        ],
        "limitations": [
          "This is an external model-intelligence prior, not a RunLocalAI local speed or fit measurement.",
          "Provider papers, human-preference ratings, and LiveBench judgments are not interchangeable with quantized local execution.",
          "Use this score only beside Will-It-Run fit, quantization, context, runtime, and measured tok/s evidence."
        ]
      },
      "scores": [
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_chinese_rating",
          "value": 780.9619978556512,
          "normalized_0_100": 7.5,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 330,
          "lower": 742.3245407643443,
          "upper": 819.5994549469582,
          "category": "chinese",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_overall_rating",
          "value": 932.8501780639943,
          "normalized_0_100": 25.56,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 354,
          "lower": 921.4931280820641,
          "upper": 944.2072280459247,
          "category": "overall",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        }
      ]
    },
    {
      "model_key": "qwen1-5-1-8b-chat",
      "display_name": "Qwen1.5-1.8B-Chat",
      "provider": null,
      "aliases": [
        "Qwen1.5-1.8B-Chat"
      ],
      "openness": null,
      "license": null,
      "parameter_billions": null,
      "context_window": null,
      "modalities": [
        "text"
      ],
      "source_ids": [
        "livebench"
      ],
      "composite": {
        "score_0_100": 25,
        "confidence": "low",
        "components": [
          {
            "source_id": "livebench",
            "metric": "livebench_overall_mean_judgment",
            "normalized_0_100": 25,
            "weight": 0.2
          }
        ],
        "derived_from": [
          "livebench:livebench_overall_mean_judgment"
        ],
        "limitations": [
          "This is an external model-intelligence prior, not a RunLocalAI local speed or fit measurement.",
          "Provider papers, human-preference ratings, and LiveBench judgments are not interchangeable with quantized local execution.",
          "Use this score only beside Will-It-Run fit, quantization, context, runtime, and measured tok/s evidence."
        ]
      },
      "scores": [
        {
          "source_id": "livebench",
          "metric": "livebench_category_coding_mean_judgment",
          "value": 0,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "coding",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench category aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_category_instruction_following_mean_judgment",
          "value": 1,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "instruction_following",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench category aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_category_language_mean_judgment",
          "value": 0,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "language",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench category aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_overall_mean_judgment",
          "value": 0.25,
          "normalized_0_100": 25,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench judgment aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_task_coding_completion_mean_judgment",
          "value": 0,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "coding",
          "task": "coding_completion",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench task aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_task_connections_mean_judgment",
          "value": 0,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "language",
          "task": "connections",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench task aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_task_lcb_generation_mean_judgment",
          "value": 0,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "coding",
          "task": "LCB_generation",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench task aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_task_story_generation_mean_judgment",
          "value": 1,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "instruction_following",
          "task": "story_generation",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench task aggregate; not a RunLocalAI local execution measurement."
        }
      ]
    },
    {
      "model_key": "yi-6b-chat",
      "display_name": "Yi-6B-Chat",
      "provider": null,
      "aliases": [
        "Yi-6B-Chat"
      ],
      "openness": null,
      "license": null,
      "parameter_billions": null,
      "context_window": null,
      "modalities": [
        "text"
      ],
      "source_ids": [
        "livebench"
      ],
      "composite": {
        "score_0_100": 25,
        "confidence": "low",
        "components": [
          {
            "source_id": "livebench",
            "metric": "livebench_overall_mean_judgment",
            "normalized_0_100": 25,
            "weight": 0.2
          }
        ],
        "derived_from": [
          "livebench:livebench_overall_mean_judgment"
        ],
        "limitations": [
          "This is an external model-intelligence prior, not a RunLocalAI local speed or fit measurement.",
          "Provider papers, human-preference ratings, and LiveBench judgments are not interchangeable with quantized local execution.",
          "Use this score only beside Will-It-Run fit, quantization, context, runtime, and measured tok/s evidence."
        ]
      },
      "scores": [
        {
          "source_id": "livebench",
          "metric": "livebench_category_coding_mean_judgment",
          "value": 0,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "coding",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench category aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_category_instruction_following_mean_judgment",
          "value": 1,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "instruction_following",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench category aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_category_language_mean_judgment",
          "value": 0,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "language",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench category aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_overall_mean_judgment",
          "value": 0.25,
          "normalized_0_100": 25,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench judgment aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_task_coding_completion_mean_judgment",
          "value": 0,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "coding",
          "task": "coding_completion",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench task aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_task_connections_mean_judgment",
          "value": 0,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "language",
          "task": "connections",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench task aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_task_lcb_generation_mean_judgment",
          "value": 0,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "coding",
          "task": "LCB_generation",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench task aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_task_story_generation_mean_judgment",
          "value": 1,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "instruction_following",
          "task": "story_generation",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench task aggregate; not a RunLocalAI local execution measurement."
        }
      ]
    },
    {
      "model_key": "meta-llama-3-1-8b-instruct-turbo",
      "display_name": "meta-llama-3.1-8b-instruct-turbo",
      "provider": null,
      "aliases": [
        "Meta-Llama-3.1-8B-Instruct-Turbo",
        "meta-llama-3.1-8b-instruct-turbo"
      ],
      "openness": null,
      "license": null,
      "parameter_billions": null,
      "context_window": null,
      "modalities": [
        "text"
      ],
      "source_ids": [
        "livebench"
      ],
      "composite": {
        "score_0_100": 24.78,
        "confidence": "low",
        "components": [
          {
            "source_id": "livebench",
            "metric": "livebench_overall_mean_judgment",
            "normalized_0_100": 24.78,
            "weight": 0.2
          }
        ],
        "derived_from": [
          "livebench:livebench_overall_mean_judgment"
        ],
        "limitations": [
          "This is an external model-intelligence prior, not a RunLocalAI local speed or fit measurement.",
          "Provider papers, human-preference ratings, and LiveBench judgments are not interchangeable with quantized local execution.",
          "Use this score only beside Will-It-Run fit, quantization, context, runtime, and measured tok/s evidence."
        ]
      },
      "scores": [
        {
          "source_id": "livebench",
          "metric": "livebench_category_coding_mean_judgment",
          "value": 0,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "coding",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench category aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_category_coding_mean_judgment",
          "value": 0.333333,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "coding",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench category aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_category_instruction_following_mean_judgment",
          "value": 0.333333,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "instruction_following",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench category aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_category_instruction_following_mean_judgment",
          "value": 0.25,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "instruction_following",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench category aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_category_language_mean_judgment",
          "value": 0.280259,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "language",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench category aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_category_language_mean_judgment",
          "value": 0.047619,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "language",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench category aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_overall_mean_judgment",
          "value": 0.247804,
          "normalized_0_100": 24.78,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench judgment aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_overall_mean_judgment",
          "value": 0.224206,
          "normalized_0_100": 22.42,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench judgment aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_task_coding_completion_mean_judgment",
          "value": 0,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "coding",
          "task": "coding_completion",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench task aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_task_connections_mean_judgment",
          "value": 0,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "language",
          "task": "connections",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench task aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_task_lcb_generation_mean_judgment",
          "value": 0.333333,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "coding",
          "task": "LCB_generation",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench task aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_task_paraphrase_mean_judgment",
          "value": 0.333333,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "instruction_following",
          "task": "paraphrase",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench task aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_task_plot_unscrambling_mean_judgment",
          "value": 0.133765,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "language",
          "task": "plot_unscrambling",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench task aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_task_plot_unscrambling_mean_judgment",
          "value": 0.095238,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "language",
          "task": "plot_unscrambling",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench task aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_task_story_generation_mean_judgment",
          "value": 0.25,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "instruction_following",
          "task": "story_generation",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench task aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_task_typos_mean_judgment",
          "value": 0.5,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "language",
          "task": "typos",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench task aggregate; not a RunLocalAI local execution measurement."
        }
      ]
    },
    {
      "model_key": "chatglm-6b",
      "display_name": "chatglm-6b",
      "provider": null,
      "aliases": [
        "chatglm-6b"
      ],
      "openness": null,
      "license": "Non-commercial",
      "parameter_billions": null,
      "context_window": null,
      "modalities": [
        "text"
      ],
      "source_ids": [
        "lmarena"
      ],
      "composite": {
        "score_0_100": 23.85,
        "confidence": "low",
        "components": [
          {
            "source_id": "lmarena",
            "metric": "lmarena_text_overall_rating",
            "normalized_0_100": 23.85,
            "weight": 0.35
          }
        ],
        "derived_from": [
          "lmarena:lmarena_text_overall_rating"
        ],
        "limitations": [
          "This is an external model-intelligence prior, not a RunLocalAI local speed or fit measurement.",
          "Provider papers, human-preference ratings, and LiveBench judgments are not interchangeable with quantized local execution.",
          "Use this score only beside Will-It-Run fit, quantization, context, runtime, and measured tok/s evidence."
        ]
      },
      "scores": [
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_chinese_rating",
          "value": 1076.3692847098553,
          "normalized_0_100": 42.62,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 285,
          "lower": 1037.515362194088,
          "upper": 1115.2232072256224,
          "category": "chinese",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_overall_rating",
          "value": 918.5040098118809,
          "normalized_0_100": 23.85,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 355,
          "lower": 906.1782049238601,
          "upper": 930.8298146999018,
          "category": "overall",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        }
      ]
    },
    {
      "model_key": "oasst-pythia-12b",
      "display_name": "oasst-pythia-12b",
      "provider": null,
      "aliases": [
        "oasst-pythia-12b"
      ],
      "openness": null,
      "license": "Apache 2.0",
      "parameter_billions": null,
      "context_window": null,
      "modalities": [
        "text"
      ],
      "source_ids": [
        "lmarena"
      ],
      "composite": {
        "score_0_100": 23.57,
        "confidence": "low",
        "components": [
          {
            "source_id": "lmarena",
            "metric": "lmarena_text_overall_rating",
            "normalized_0_100": 23.57,
            "weight": 0.35
          }
        ],
        "derived_from": [
          "lmarena:lmarena_text_overall_rating"
        ],
        "limitations": [
          "This is an external model-intelligence prior, not a RunLocalAI local speed or fit measurement.",
          "Provider papers, human-preference ratings, and LiveBench judgments are not interchangeable with quantized local execution.",
          "Use this score only beside Will-It-Run fit, quantization, context, runtime, and measured tok/s evidence."
        ]
      },
      "scores": [
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_chinese_rating",
          "value": 802.011309662715,
          "normalized_0_100": 10,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 329,
          "lower": 768.9525630623788,
          "upper": 835.0700562630514,
          "category": "chinese",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_overall_rating",
          "value": 916.1059328802198,
          "normalized_0_100": 23.57,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 356,
          "lower": 905.3244215624069,
          "upper": 926.8874441980328,
          "category": "overall",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        }
      ]
    },
    {
      "model_key": "xiaomimimo-mimo-v2-flash",
      "display_name": "XiaomiMiMo/MiMo-V2-Flash",
      "provider": "XiaomiMiMo",
      "aliases": [
        "XiaomiMiMo/MiMo-V2-Flash"
      ],
      "openness": "open",
      "license": "Unknown",
      "parameter_billions": 309.8,
      "context_window": null,
      "modalities": [
        "text"
      ],
      "source_ids": [
        "openevals"
      ],
      "composite": {
        "score_0_100": 22.1,
        "confidence": "low",
        "components": [
          {
            "source_id": "openevals",
            "metric": "openevals_aggregate",
            "normalized_0_100": 22.1,
            "weight": 0.45
          }
        ],
        "derived_from": [
          "openevals:openevals_aggregate"
        ],
        "limitations": [
          "This is an external model-intelligence prior, not a RunLocalAI local speed or fit measurement.",
          "Provider papers, human-preference ratings, and LiveBench judgments are not interchangeable with quantized local execution.",
          "Use this score only beside Will-It-Run fit, quantization, context, runtime, and measured tok/s evidence."
        ]
      },
      "scores": [
        {
          "source_id": "openevals",
          "metric": "hle",
          "value": 22.1,
          "normalized_0_100": 22.1,
          "scale": "0-100",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/OpenEvals/leaderboard-data",
          "source_dataset": "OpenEvals/leaderboard-data",
          "source_config": "default",
          "source_split": "train",
          "source_row_count": 105,
          "captured_at": "2026-05-28T11:57:28.766Z",
          "evidence_note": "External model quality prior from OpenEvals; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "openevals",
          "metric": "openevals_aggregate",
          "value": 22.1,
          "normalized_0_100": 22.1,
          "scale": "0-100",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/OpenEvals/leaderboard-data",
          "source_dataset": "OpenEvals/leaderboard-data",
          "source_config": "default",
          "source_split": "train",
          "source_row_count": 105,
          "captured_at": "2026-05-28T11:57:28.766Z",
          "evidence_note": "External model quality prior from OpenEvals; not a RunLocalAI local execution measurement."
        }
      ]
    },
    {
      "model_key": "deepseek-coder-v2-lite-instruct",
      "display_name": "DeepSeek-Coder-V2-Lite-Instruct",
      "provider": null,
      "aliases": [
        "DeepSeek-Coder-V2-Lite-Instruct"
      ],
      "openness": null,
      "license": null,
      "parameter_billions": null,
      "context_window": null,
      "modalities": [
        "text"
      ],
      "source_ids": [
        "livebench"
      ],
      "composite": {
        "score_0_100": 21.63,
        "confidence": "low",
        "components": [
          {
            "source_id": "livebench",
            "metric": "livebench_overall_mean_judgment",
            "normalized_0_100": 21.63,
            "weight": 0.2
          }
        ],
        "derived_from": [
          "livebench:livebench_overall_mean_judgment"
        ],
        "limitations": [
          "This is an external model-intelligence prior, not a RunLocalAI local speed or fit measurement.",
          "Provider papers, human-preference ratings, and LiveBench judgments are not interchangeable with quantized local execution.",
          "Use this score only beside Will-It-Run fit, quantization, context, runtime, and measured tok/s evidence."
        ]
      },
      "scores": [
        {
          "source_id": "livebench",
          "metric": "livebench_category_coding_mean_judgment",
          "value": 0.333333,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "coding",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench category aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_category_instruction_following_mean_judgment",
          "value": 0.25,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "instruction_following",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench category aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_category_language_mean_judgment",
          "value": 0.02381,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "language",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench category aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_overall_mean_judgment",
          "value": 0.21627,
          "normalized_0_100": 21.63,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench judgment aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_task_coding_completion_mean_judgment",
          "value": 1,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "coding",
          "task": "coding_completion",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench task aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_task_connections_mean_judgment",
          "value": 0,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "language",
          "task": "connections",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench task aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_task_lcb_generation_mean_judgment",
          "value": 0,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "coding",
          "task": "LCB_generation",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench task aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_task_plot_unscrambling_mean_judgment",
          "value": 0.047619,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "language",
          "task": "plot_unscrambling",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench task aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_task_story_generation_mean_judgment",
          "value": 0.25,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "instruction_following",
          "task": "story_generation",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench task aggregate; not a RunLocalAI local execution measurement."
        }
      ]
    },
    {
      "model_key": "deepseek-r1-distill-llama-70b",
      "display_name": "deepseek-r1-distill-llama-70b",
      "provider": null,
      "aliases": [
        "deepseek-r1-distill-llama-70b"
      ],
      "openness": null,
      "license": null,
      "parameter_billions": null,
      "context_window": null,
      "modalities": [
        "text"
      ],
      "source_ids": [
        "livebench"
      ],
      "composite": {
        "score_0_100": 21.52,
        "confidence": "low",
        "components": [
          {
            "source_id": "livebench",
            "metric": "livebench_overall_mean_judgment",
            "normalized_0_100": 21.52,
            "weight": 0.2
          }
        ],
        "derived_from": [
          "livebench:livebench_overall_mean_judgment"
        ],
        "limitations": [
          "This is an external model-intelligence prior, not a RunLocalAI local speed or fit measurement.",
          "Provider papers, human-preference ratings, and LiveBench judgments are not interchangeable with quantized local execution.",
          "Use this score only beside Will-It-Run fit, quantization, context, runtime, and measured tok/s evidence."
        ]
      },
      "scores": [
        {
          "source_id": "livebench",
          "metric": "livebench_category_coding_mean_judgment",
          "value": 0,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "coding",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench category aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_category_instruction_following_mean_judgment",
          "value": 0.2,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "instruction_following",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench category aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_category_language_mean_judgment",
          "value": 0.272727,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "language",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench category aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_overall_mean_judgment",
          "value": 0.215152,
          "normalized_0_100": 21.52,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench judgment aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_task_lcb_generation_mean_judgment",
          "value": 0,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "coding",
          "task": "LCB_generation",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench task aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_task_paraphrase_mean_judgment",
          "value": 0.2,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "instruction_following",
          "task": "paraphrase",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench task aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_task_plot_unscrambling_mean_judgment",
          "value": 0.090909,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "language",
          "task": "plot_unscrambling",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench task aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_task_typos_mean_judgment",
          "value": 0.333333,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "language",
          "task": "typos",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench task aggregate; not a RunLocalAI local execution measurement."
        }
      ]
    },
    {
      "model_key": "qwen-qwen3-235b-a22b",
      "display_name": "Qwen/Qwen3-235B-A22B",
      "provider": "Qwen",
      "aliases": [
        "Qwen/Qwen3-235B-A22B"
      ],
      "openness": "open",
      "license": "Unknown",
      "parameter_billions": 235.1,
      "context_window": null,
      "modalities": [
        "text"
      ],
      "source_ids": [
        "openevals"
      ],
      "composite": {
        "score_0_100": 21.41,
        "confidence": "low",
        "components": [
          {
            "source_id": "openevals",
            "metric": "openevals_aggregate",
            "normalized_0_100": 21.41,
            "weight": 0.45
          }
        ],
        "derived_from": [
          "openevals:openevals_aggregate"
        ],
        "limitations": [
          "This is an external model-intelligence prior, not a RunLocalAI local speed or fit measurement.",
          "Provider papers, human-preference ratings, and LiveBench judgments are not interchangeable with quantized local execution.",
          "Use this score only beside Will-It-Run fit, quantization, context, runtime, and measured tok/s evidence."
        ]
      },
      "scores": [
        {
          "source_id": "openevals",
          "metric": "openevals_aggregate",
          "value": 21.41,
          "normalized_0_100": 21.41,
          "scale": "0-100",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/OpenEvals/leaderboard-data",
          "source_dataset": "OpenEvals/leaderboard-data",
          "source_config": "default",
          "source_split": "train",
          "source_row_count": 105,
          "captured_at": "2026-05-28T11:57:28.766Z",
          "evidence_note": "External model quality prior from OpenEvals; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "openevals",
          "metric": "swe_pro",
          "value": 21.41,
          "normalized_0_100": 21.41,
          "scale": "0-100",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/OpenEvals/leaderboard-data",
          "source_dataset": "OpenEvals/leaderboard-data",
          "source_config": "default",
          "source_split": "train",
          "source_row_count": 105,
          "captured_at": "2026-05-28T11:57:28.766Z",
          "evidence_note": "External model quality prior from OpenEvals; not a RunLocalAI local execution measurement."
        }
      ]
    },
    {
      "model_key": "fastchat-t5-3b",
      "display_name": "fastchat-t5-3b",
      "provider": null,
      "aliases": [
        "fastchat-t5-3b"
      ],
      "openness": null,
      "license": "Apache 2.0",
      "parameter_billions": null,
      "context_window": null,
      "modalities": [
        "text"
      ],
      "source_ids": [
        "lmarena"
      ],
      "composite": {
        "score_0_100": 20.99,
        "confidence": "low",
        "components": [
          {
            "source_id": "lmarena",
            "metric": "lmarena_text_overall_rating",
            "normalized_0_100": 20.99,
            "weight": 0.35
          }
        ],
        "derived_from": [
          "lmarena:lmarena_text_overall_rating"
        ],
        "limitations": [
          "This is an external model-intelligence prior, not a RunLocalAI local speed or fit measurement.",
          "Provider papers, human-preference ratings, and LiveBench judgments are not interchangeable with quantized local execution.",
          "Use this score only beside Will-It-Run fit, quantization, context, runtime, and measured tok/s evidence."
        ]
      },
      "scores": [
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_chinese_rating",
          "value": 717.9228393627601,
          "normalized_0_100": 0,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 331,
          "lower": 679.914467480065,
          "upper": 755.9312112454553,
          "category": "chinese",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_overall_rating",
          "value": 894.4384812305873,
          "normalized_0_100": 20.99,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 357,
          "lower": 882.1276052397834,
          "upper": 906.7493572213912,
          "category": "overall",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        }
      ]
    },
    {
      "model_key": "qwen2-1-5b-instruct",
      "display_name": "Qwen2-1.5B-Instruct",
      "provider": null,
      "aliases": [
        "Qwen2-1.5B-Instruct"
      ],
      "openness": null,
      "license": null,
      "parameter_billions": null,
      "context_window": null,
      "modalities": [
        "text"
      ],
      "source_ids": [
        "livebench"
      ],
      "composite": {
        "score_0_100": 20.83,
        "confidence": "low",
        "components": [
          {
            "source_id": "livebench",
            "metric": "livebench_overall_mean_judgment",
            "normalized_0_100": 20.83,
            "weight": 0.2
          }
        ],
        "derived_from": [
          "livebench:livebench_overall_mean_judgment"
        ],
        "limitations": [
          "This is an external model-intelligence prior, not a RunLocalAI local speed or fit measurement.",
          "Provider papers, human-preference ratings, and LiveBench judgments are not interchangeable with quantized local execution.",
          "Use this score only beside Will-It-Run fit, quantization, context, runtime, and measured tok/s evidence."
        ]
      },
      "scores": [
        {
          "source_id": "livebench",
          "metric": "livebench_category_coding_mean_judgment",
          "value": 0.25,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "coding",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench category aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_category_instruction_following_mean_judgment",
          "value": 0.25,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "instruction_following",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench category aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_category_language_mean_judgment",
          "value": 0,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "language",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench category aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_overall_mean_judgment",
          "value": 0.208333,
          "normalized_0_100": 20.83,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench judgment aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_task_coding_completion_mean_judgment",
          "value": 0,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "coding",
          "task": "coding_completion",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench task aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_task_connections_mean_judgment",
          "value": 0,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "language",
          "task": "connections",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench task aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_task_lcb_generation_mean_judgment",
          "value": 0.333333,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "coding",
          "task": "LCB_generation",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench task aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_task_story_generation_mean_judgment",
          "value": 0.25,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "instruction_following",
          "task": "story_generation",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench task aggregate; not a RunLocalAI local execution measurement."
        }
      ]
    },
    {
      "model_key": "qwen-qwen3-5-0-8b",
      "display_name": "Qwen/Qwen3.5-0.8B",
      "provider": "Qwen",
      "aliases": [
        "Qwen/Qwen3.5-0.8B"
      ],
      "openness": "open",
      "license": "Unknown",
      "parameter_billions": 0.9,
      "context_window": null,
      "modalities": [
        "text"
      ],
      "source_ids": [
        "openevals"
      ],
      "composite": {
        "score_0_100": 20.8,
        "confidence": "low",
        "components": [
          {
            "source_id": "openevals",
            "metric": "openevals_aggregate",
            "normalized_0_100": 20.8,
            "weight": 0.45
          }
        ],
        "derived_from": [
          "openevals:openevals_aggregate"
        ],
        "limitations": [
          "This is an external model-intelligence prior, not a RunLocalAI local speed or fit measurement.",
          "Provider papers, human-preference ratings, and LiveBench judgments are not interchangeable with quantized local execution.",
          "Use this score only beside Will-It-Run fit, quantization, context, runtime, and measured tok/s evidence."
        ]
      },
      "scores": [
        {
          "source_id": "openevals",
          "metric": "gpqa",
          "value": 11.9,
          "normalized_0_100": 11.9,
          "scale": "0-100",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/OpenEvals/leaderboard-data",
          "source_dataset": "OpenEvals/leaderboard-data",
          "source_config": "default",
          "source_split": "train",
          "source_row_count": 105,
          "captured_at": "2026-05-28T11:57:28.766Z",
          "evidence_note": "External model quality prior from OpenEvals; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "openevals",
          "metric": "mmlu_pro",
          "value": 29.7,
          "normalized_0_100": 29.7,
          "scale": "0-100",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/OpenEvals/leaderboard-data",
          "source_dataset": "OpenEvals/leaderboard-data",
          "source_config": "default",
          "source_split": "train",
          "source_row_count": 105,
          "captured_at": "2026-05-28T11:57:28.766Z",
          "evidence_note": "External model quality prior from OpenEvals; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "openevals",
          "metric": "openevals_aggregate",
          "value": 20.8,
          "normalized_0_100": 20.8,
          "scale": "0-100",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/OpenEvals/leaderboard-data",
          "source_dataset": "OpenEvals/leaderboard-data",
          "source_config": "default",
          "source_split": "train",
          "source_row_count": 105,
          "captured_at": "2026-05-28T11:57:28.766Z",
          "evidence_note": "External model quality prior from OpenEvals; not a RunLocalAI local execution measurement."
        }
      ]
    },
    {
      "model_key": "nvidia-nemotron-terminal-14b",
      "display_name": "nvidia/Nemotron-Terminal-14B",
      "provider": "nvidia",
      "aliases": [
        "nvidia/Nemotron-Terminal-14B"
      ],
      "openness": "open",
      "license": "Unknown",
      "parameter_billions": 14.8,
      "context_window": null,
      "modalities": [
        "text"
      ],
      "source_ids": [
        "openevals"
      ],
      "composite": {
        "score_0_100": 20.2,
        "confidence": "low",
        "components": [
          {
            "source_id": "openevals",
            "metric": "openevals_aggregate",
            "normalized_0_100": 20.2,
            "weight": 0.45
          }
        ],
        "derived_from": [
          "openevals:openevals_aggregate"
        ],
        "limitations": [
          "This is an external model-intelligence prior, not a RunLocalAI local speed or fit measurement.",
          "Provider papers, human-preference ratings, and LiveBench judgments are not interchangeable with quantized local execution.",
          "Use this score only beside Will-It-Run fit, quantization, context, runtime, and measured tok/s evidence."
        ]
      },
      "scores": [
        {
          "source_id": "openevals",
          "metric": "openevals_aggregate",
          "value": 20.2,
          "normalized_0_100": 20.2,
          "scale": "0-100",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/OpenEvals/leaderboard-data",
          "source_dataset": "OpenEvals/leaderboard-data",
          "source_config": "default",
          "source_split": "train",
          "source_row_count": 105,
          "captured_at": "2026-05-28T11:57:28.766Z",
          "evidence_note": "External model quality prior from OpenEvals; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "openevals",
          "metric": "terminal_bench",
          "value": 20.2,
          "normalized_0_100": 20.2,
          "scale": "0-100",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/OpenEvals/leaderboard-data",
          "source_dataset": "OpenEvals/leaderboard-data",
          "source_config": "default",
          "source_split": "train",
          "source_row_count": 105,
          "captured_at": "2026-05-28T11:57:28.766Z",
          "evidence_note": "External model quality prior from OpenEvals; not a RunLocalAI local execution measurement."
        }
      ]
    },
    {
      "model_key": "llama-3-1-nemotron-70b-instruct-hf",
      "display_name": "Llama-3.1-Nemotron-70B-Instruct-HF",
      "provider": null,
      "aliases": [
        "Llama-3.1-Nemotron-70B-Instruct-HF"
      ],
      "openness": null,
      "license": null,
      "parameter_billions": null,
      "context_window": null,
      "modalities": [
        "text"
      ],
      "source_ids": [
        "livebench"
      ],
      "composite": {
        "score_0_100": 20,
        "confidence": "low",
        "components": [
          {
            "source_id": "livebench",
            "metric": "livebench_overall_mean_judgment",
            "normalized_0_100": 20,
            "weight": 0.2
          }
        ],
        "derived_from": [
          "livebench:livebench_overall_mean_judgment"
        ],
        "limitations": [
          "This is an external model-intelligence prior, not a RunLocalAI local speed or fit measurement.",
          "Provider papers, human-preference ratings, and LiveBench judgments are not interchangeable with quantized local execution.",
          "Use this score only beside Will-It-Run fit, quantization, context, runtime, and measured tok/s evidence."
        ]
      },
      "scores": [
        {
          "source_id": "livebench",
          "metric": "livebench_category_instruction_following_mean_judgment",
          "value": 0.2,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "instruction_following",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench category aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_overall_mean_judgment",
          "value": 0.2,
          "normalized_0_100": 20,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench judgment aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_task_paraphrase_mean_judgment",
          "value": 0.2,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "instruction_following",
          "task": "paraphrase",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench task aggregate; not a RunLocalAI local execution measurement."
        }
      ]
    },
    {
      "model_key": "phi-3-5-mini-instruct",
      "display_name": "Phi-3.5-mini-instruct",
      "provider": null,
      "aliases": [
        "Phi-3.5-mini-instruct"
      ],
      "openness": null,
      "license": null,
      "parameter_billions": null,
      "context_window": null,
      "modalities": [
        "text"
      ],
      "source_ids": [
        "livebench"
      ],
      "composite": {
        "score_0_100": 20,
        "confidence": "low",
        "components": [
          {
            "source_id": "livebench",
            "metric": "livebench_overall_mean_judgment",
            "normalized_0_100": 20,
            "weight": 0.2
          }
        ],
        "derived_from": [
          "livebench:livebench_overall_mean_judgment"
        ],
        "limitations": [
          "This is an external model-intelligence prior, not a RunLocalAI local speed or fit measurement.",
          "Provider papers, human-preference ratings, and LiveBench judgments are not interchangeable with quantized local execution.",
          "Use this score only beside Will-It-Run fit, quantization, context, runtime, and measured tok/s evidence."
        ]
      },
      "scores": [
        {
          "source_id": "livebench",
          "metric": "livebench_category_coding_mean_judgment",
          "value": 0,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "coding",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench category aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_category_instruction_following_mean_judgment",
          "value": 1,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "instruction_following",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench category aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_category_language_mean_judgment",
          "value": 0,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "language",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench category aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_overall_mean_judgment",
          "value": 0.2,
          "normalized_0_100": 20,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench judgment aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_task_connections_mean_judgment",
          "value": 0,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "language",
          "task": "connections",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench task aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_task_lcb_generation_mean_judgment",
          "value": 0,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "coding",
          "task": "LCB_generation",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench task aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_task_story_generation_mean_judgment",
          "value": 1,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "instruction_following",
          "task": "story_generation",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench task aggregate; not a RunLocalAI local execution measurement."
        }
      ]
    },
    {
      "model_key": "phi-3-5-moe-instruct",
      "display_name": "Phi-3.5-MoE-instruct",
      "provider": null,
      "aliases": [
        "Phi-3.5-MoE-instruct"
      ],
      "openness": null,
      "license": null,
      "parameter_billions": null,
      "context_window": null,
      "modalities": [
        "text"
      ],
      "source_ids": [
        "livebench"
      ],
      "composite": {
        "score_0_100": 20,
        "confidence": "low",
        "components": [
          {
            "source_id": "livebench",
            "metric": "livebench_overall_mean_judgment",
            "normalized_0_100": 20,
            "weight": 0.2
          }
        ],
        "derived_from": [
          "livebench:livebench_overall_mean_judgment"
        ],
        "limitations": [
          "This is an external model-intelligence prior, not a RunLocalAI local speed or fit measurement.",
          "Provider papers, human-preference ratings, and LiveBench judgments are not interchangeable with quantized local execution.",
          "Use this score only beside Will-It-Run fit, quantization, context, runtime, and measured tok/s evidence."
        ]
      },
      "scores": [
        {
          "source_id": "livebench",
          "metric": "livebench_category_coding_mean_judgment",
          "value": 0,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "coding",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench category aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_category_instruction_following_mean_judgment",
          "value": 1,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "instruction_following",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench category aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_category_language_mean_judgment",
          "value": 0,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "language",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench category aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_overall_mean_judgment",
          "value": 0.2,
          "normalized_0_100": 20,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench judgment aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_task_connections_mean_judgment",
          "value": 0,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "language",
          "task": "connections",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench task aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_task_lcb_generation_mean_judgment",
          "value": 0,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "coding",
          "task": "LCB_generation",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench task aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_task_story_generation_mean_judgment",
          "value": 1,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "instruction_following",
          "task": "story_generation",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench task aggregate; not a RunLocalAI local execution measurement."
        }
      ]
    },
    {
      "model_key": "openbmb-agentcpm-explore",
      "display_name": "openbmb/AgentCPM-Explore",
      "provider": "openbmb",
      "aliases": [
        "openbmb/AgentCPM-Explore"
      ],
      "openness": "open",
      "license": "Unknown",
      "parameter_billions": 4,
      "context_window": null,
      "modalities": [
        "text"
      ],
      "source_ids": [
        "openevals"
      ],
      "composite": {
        "score_0_100": 19.1,
        "confidence": "low",
        "components": [
          {
            "source_id": "openevals",
            "metric": "openevals_aggregate",
            "normalized_0_100": 19.1,
            "weight": 0.45
          }
        ],
        "derived_from": [
          "openevals:openevals_aggregate"
        ],
        "limitations": [
          "This is an external model-intelligence prior, not a RunLocalAI local speed or fit measurement.",
          "Provider papers, human-preference ratings, and LiveBench judgments are not interchangeable with quantized local execution.",
          "Use this score only beside Will-It-Run fit, quantization, context, runtime, and measured tok/s evidence."
        ]
      },
      "scores": [
        {
          "source_id": "openevals",
          "metric": "hle",
          "value": 19.1,
          "normalized_0_100": 19.1,
          "scale": "0-100",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/OpenEvals/leaderboard-data",
          "source_dataset": "OpenEvals/leaderboard-data",
          "source_config": "default",
          "source_split": "train",
          "source_row_count": 105,
          "captured_at": "2026-05-28T11:57:28.766Z",
          "evidence_note": "External model quality prior from OpenEvals; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "openevals",
          "metric": "openevals_aggregate",
          "value": 19.1,
          "normalized_0_100": 19.1,
          "scale": "0-100",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/OpenEvals/leaderboard-data",
          "source_dataset": "OpenEvals/leaderboard-data",
          "source_config": "default",
          "source_split": "train",
          "source_row_count": 105,
          "captured_at": "2026-05-28T11:57:28.766Z",
          "evidence_note": "External model quality prior from OpenEvals; not a RunLocalAI local execution measurement."
        }
      ]
    },
    {
      "model_key": "stablelm-tuned-alpha-7b",
      "display_name": "stablelm-tuned-alpha-7b",
      "provider": null,
      "aliases": [
        "stablelm-tuned-alpha-7b"
      ],
      "openness": null,
      "license": "CC-BY-NC-SA-4.0",
      "parameter_billions": null,
      "context_window": null,
      "modalities": [
        "text"
      ],
      "source_ids": [
        "lmarena"
      ],
      "composite": {
        "score_0_100": 17.68,
        "confidence": "low",
        "components": [
          {
            "source_id": "lmarena",
            "metric": "lmarena_text_overall_rating",
            "normalized_0_100": 17.68,
            "weight": 0.35
          }
        ],
        "derived_from": [
          "lmarena:lmarena_text_overall_rating"
        ],
        "limitations": [
          "This is an external model-intelligence prior, not a RunLocalAI local speed or fit measurement.",
          "Provider papers, human-preference ratings, and LiveBench judgments are not interchangeable with quantized local execution.",
          "Use this score only beside Will-It-Run fit, quantization, context, runtime, and measured tok/s evidence."
        ]
      },
      "scores": [
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_overall_rating",
          "value": 866.6125392789676,
          "normalized_0_100": 17.68,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 358,
          "lower": 853.9162552343629,
          "upper": 879.3088233235723,
          "category": "overall",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        }
      ]
    },
    {
      "model_key": "zai-org-glm-4-6",
      "display_name": "zai-org/GLM-4.6",
      "provider": "zai-org",
      "aliases": [
        "zai-org/GLM-4.6"
      ],
      "openness": "open",
      "license": "Unknown",
      "parameter_billions": 356.8,
      "context_window": null,
      "modalities": [
        "text"
      ],
      "source_ids": [
        "openevals"
      ],
      "composite": {
        "score_0_100": 17.09,
        "confidence": "low",
        "components": [
          {
            "source_id": "openevals",
            "metric": "openevals_aggregate",
            "normalized_0_100": 17.09,
            "weight": 0.45
          }
        ],
        "derived_from": [
          "openevals:openevals_aggregate"
        ],
        "limitations": [
          "This is an external model-intelligence prior, not a RunLocalAI local speed or fit measurement.",
          "Provider papers, human-preference ratings, and LiveBench judgments are not interchangeable with quantized local execution.",
          "Use this score only beside Will-It-Run fit, quantization, context, runtime, and measured tok/s evidence."
        ]
      },
      "scores": [
        {
          "source_id": "openevals",
          "metric": "openevals_aggregate",
          "value": 17.09,
          "normalized_0_100": 17.09,
          "scale": "0-100",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/OpenEvals/leaderboard-data",
          "source_dataset": "OpenEvals/leaderboard-data",
          "source_config": "default",
          "source_split": "train",
          "source_row_count": 105,
          "captured_at": "2026-05-28T11:57:28.766Z",
          "evidence_note": "External model quality prior from OpenEvals; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "openevals",
          "metric": "swe_pro",
          "value": 9.67,
          "normalized_0_100": 9.67,
          "scale": "0-100",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/OpenEvals/leaderboard-data",
          "source_dataset": "OpenEvals/leaderboard-data",
          "source_config": "default",
          "source_split": "train",
          "source_row_count": 105,
          "captured_at": "2026-05-28T11:57:28.766Z",
          "evidence_note": "External model quality prior from OpenEvals; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "openevals",
          "metric": "terminal_bench",
          "value": 24.5,
          "normalized_0_100": 24.5,
          "scale": "0-100",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/OpenEvals/leaderboard-data",
          "source_dataset": "OpenEvals/leaderboard-data",
          "source_config": "default",
          "source_split": "train",
          "source_row_count": 105,
          "captured_at": "2026-05-28T11:57:28.766Z",
          "evidence_note": "External model quality prior from OpenEvals; not a RunLocalAI local execution measurement."
        }
      ]
    },
    {
      "model_key": "sky-t1-32b-preview",
      "display_name": "sky-t1-32b-preview",
      "provider": null,
      "aliases": [
        "sky-t1-32b-preview"
      ],
      "openness": null,
      "license": null,
      "parameter_billions": null,
      "context_window": null,
      "modalities": [
        "text"
      ],
      "source_ids": [
        "livebench"
      ],
      "composite": {
        "score_0_100": 16.99,
        "confidence": "low",
        "components": [
          {
            "source_id": "livebench",
            "metric": "livebench_overall_mean_judgment",
            "normalized_0_100": 16.99,
            "weight": 0.2
          }
        ],
        "derived_from": [
          "livebench:livebench_overall_mean_judgment"
        ],
        "limitations": [
          "This is an external model-intelligence prior, not a RunLocalAI local speed or fit measurement.",
          "Provider papers, human-preference ratings, and LiveBench judgments are not interchangeable with quantized local execution.",
          "Use this score only beside Will-It-Run fit, quantization, context, runtime, and measured tok/s evidence."
        ]
      },
      "scores": [
        {
          "source_id": "livebench",
          "metric": "livebench_category_coding_mean_judgment",
          "value": 0,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "coding",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench category aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_category_instruction_following_mean_judgment",
          "value": 0.333333,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "instruction_following",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench category aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_category_language_mean_judgment",
          "value": 0.176471,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "language",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench category aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_overall_mean_judgment",
          "value": 0.169935,
          "normalized_0_100": 16.99,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench judgment aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_task_coding_completion_mean_judgment",
          "value": 0,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "coding",
          "task": "coding_completion",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench task aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_task_paraphrase_mean_judgment",
          "value": 0.333333,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "instruction_following",
          "task": "paraphrase",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench task aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_task_plot_unscrambling_mean_judgment",
          "value": 0.176471,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "language",
          "task": "plot_unscrambling",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench task aggregate; not a RunLocalAI local execution measurement."
        }
      ]
    },
    {
      "model_key": "dolly-v2-12b",
      "display_name": "dolly-v2-12b",
      "provider": null,
      "aliases": [
        "dolly-v2-12b"
      ],
      "openness": null,
      "license": "MIT",
      "parameter_billions": null,
      "context_window": null,
      "modalities": [
        "text"
      ],
      "source_ids": [
        "lmarena"
      ],
      "composite": {
        "score_0_100": 15.84,
        "confidence": "low",
        "components": [
          {
            "source_id": "lmarena",
            "metric": "lmarena_text_overall_rating",
            "normalized_0_100": 15.84,
            "weight": 0.35
          }
        ],
        "derived_from": [
          "lmarena:lmarena_text_overall_rating"
        ],
        "limitations": [
          "This is an external model-intelligence prior, not a RunLocalAI local speed or fit measurement.",
          "Provider papers, human-preference ratings, and LiveBench judgments are not interchangeable with quantized local execution.",
          "Use this score only beside Will-It-Run fit, quantization, context, runtime, and measured tok/s evidence."
        ]
      },
      "scores": [
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_chinese_rating",
          "value": 835.4467641125702,
          "normalized_0_100": 13.98,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 328,
          "lower": 790.4002547940053,
          "upper": 880.4932734311352,
          "category": "chinese",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_overall_rating",
          "value": 851.166861963198,
          "normalized_0_100": 15.84,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 359,
          "lower": 837.7890617025461,
          "upper": 864.5446622238501,
          "category": "overall",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        }
      ]
    },
    {
      "model_key": "llama-13b",
      "display_name": "llama-13b",
      "provider": "meta",
      "aliases": [
        "llama-13b"
      ],
      "openness": null,
      "license": "Non-commercial",
      "parameter_billions": null,
      "context_window": null,
      "modalities": [
        "text"
      ],
      "source_ids": [
        "lmarena"
      ],
      "composite": {
        "score_0_100": 13.75,
        "confidence": "low",
        "components": [
          {
            "source_id": "lmarena",
            "metric": "lmarena_text_overall_rating",
            "normalized_0_100": 13.75,
            "weight": 0.35
          }
        ],
        "derived_from": [
          "lmarena:lmarena_text_overall_rating"
        ],
        "limitations": [
          "This is an external model-intelligence prior, not a RunLocalAI local speed or fit measurement.",
          "Provider papers, human-preference ratings, and LiveBench judgments are not interchangeable with quantized local execution.",
          "Use this score only beside Will-It-Run fit, quantization, context, runtime, and measured tok/s evidence."
        ]
      },
      "scores": [
        {
          "source_id": "lmarena",
          "metric": "lmarena_text_overall_rating",
          "value": 833.549647154095,
          "normalized_0_100": 13.75,
          "scale": "bradley-terry-rating",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset",
          "source_dataset": "lmarena-ai/leaderboard-dataset",
          "source_config": "text",
          "source_split": "latest",
          "source_row_count": 8890,
          "rank": 360,
          "lower": 817.8221589084615,
          "upper": 849.2771353997285,
          "category": "overall",
          "captured_at": "2026-05-28T11:57:41.408Z",
          "evidence_note": "External human-preference prior from LMArena; not a RunLocalAI local execution measurement."
        }
      ]
    },
    {
      "model_key": "nvidia-nemotron-terminal-8b",
      "display_name": "nvidia/Nemotron-Terminal-8B",
      "provider": "nvidia",
      "aliases": [
        "nvidia/Nemotron-Terminal-8B"
      ],
      "openness": "open",
      "license": "Unknown",
      "parameter_billions": 8.2,
      "context_window": null,
      "modalities": [
        "text"
      ],
      "source_ids": [
        "openevals"
      ],
      "composite": {
        "score_0_100": 13,
        "confidence": "low",
        "components": [
          {
            "source_id": "openevals",
            "metric": "openevals_aggregate",
            "normalized_0_100": 13,
            "weight": 0.45
          }
        ],
        "derived_from": [
          "openevals:openevals_aggregate"
        ],
        "limitations": [
          "This is an external model-intelligence prior, not a RunLocalAI local speed or fit measurement.",
          "Provider papers, human-preference ratings, and LiveBench judgments are not interchangeable with quantized local execution.",
          "Use this score only beside Will-It-Run fit, quantization, context, runtime, and measured tok/s evidence."
        ]
      },
      "scores": [
        {
          "source_id": "openevals",
          "metric": "openevals_aggregate",
          "value": 13,
          "normalized_0_100": 13,
          "scale": "0-100",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/OpenEvals/leaderboard-data",
          "source_dataset": "OpenEvals/leaderboard-data",
          "source_config": "default",
          "source_split": "train",
          "source_row_count": 105,
          "captured_at": "2026-05-28T11:57:28.766Z",
          "evidence_note": "External model quality prior from OpenEvals; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "openevals",
          "metric": "terminal_bench",
          "value": 13,
          "normalized_0_100": 13,
          "scale": "0-100",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/OpenEvals/leaderboard-data",
          "source_dataset": "OpenEvals/leaderboard-data",
          "source_config": "default",
          "source_split": "train",
          "source_row_count": 105,
          "captured_at": "2026-05-28T11:57:28.766Z",
          "evidence_note": "External model quality prior from OpenEvals; not a RunLocalAI local execution measurement."
        }
      ]
    },
    {
      "model_key": "olmo-2-1124-13b-instruct",
      "display_name": "olmo-2-1124-13b-instruct",
      "provider": null,
      "aliases": [
        "olmo-2-1124-13b-instruct"
      ],
      "openness": null,
      "license": null,
      "parameter_billions": null,
      "context_window": null,
      "modalities": [
        "text"
      ],
      "source_ids": [
        "livebench"
      ],
      "composite": {
        "score_0_100": 12.75,
        "confidence": "low",
        "components": [
          {
            "source_id": "livebench",
            "metric": "livebench_overall_mean_judgment",
            "normalized_0_100": 12.75,
            "weight": 0.2
          }
        ],
        "derived_from": [
          "livebench:livebench_overall_mean_judgment"
        ],
        "limitations": [
          "This is an external model-intelligence prior, not a RunLocalAI local speed or fit measurement.",
          "Provider papers, human-preference ratings, and LiveBench judgments are not interchangeable with quantized local execution.",
          "Use this score only beside Will-It-Run fit, quantization, context, runtime, and measured tok/s evidence."
        ]
      },
      "scores": [
        {
          "source_id": "livebench",
          "metric": "livebench_category_coding_mean_judgment",
          "value": 0,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "coding",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench category aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_category_instruction_following_mean_judgment",
          "value": 0.333333,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "instruction_following",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench category aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_category_language_mean_judgment",
          "value": 0.088235,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "language",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench category aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_overall_mean_judgment",
          "value": 0.127451,
          "normalized_0_100": 12.75,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench judgment aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_task_coding_completion_mean_judgment",
          "value": 0,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "coding",
          "task": "coding_completion",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench task aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_task_paraphrase_mean_judgment",
          "value": 0.333333,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "instruction_following",
          "task": "paraphrase",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench task aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_task_plot_unscrambling_mean_judgment",
          "value": 0.176471,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "language",
          "task": "plot_unscrambling",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench task aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_task_typos_mean_judgment",
          "value": 0,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "language",
          "task": "typos",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench task aggregate; not a RunLocalAI local execution measurement."
        }
      ]
    },
    {
      "model_key": "google-gemma-3-27b-it",
      "display_name": "google/gemma-3-27b-it",
      "provider": "google",
      "aliases": [
        "google/gemma-3-27b-it"
      ],
      "openness": "open",
      "license": "Unknown",
      "parameter_billions": 27.4,
      "context_window": null,
      "modalities": [
        "text"
      ],
      "source_ids": [
        "openevals"
      ],
      "composite": {
        "score_0_100": 11.38,
        "confidence": "low",
        "components": [
          {
            "source_id": "openevals",
            "metric": "openevals_aggregate",
            "normalized_0_100": 11.38,
            "weight": 0.45
          }
        ],
        "derived_from": [
          "openevals:openevals_aggregate"
        ],
        "limitations": [
          "This is an external model-intelligence prior, not a RunLocalAI local speed or fit measurement.",
          "Provider papers, human-preference ratings, and LiveBench judgments are not interchangeable with quantized local execution.",
          "Use this score only beside Will-It-Run fit, quantization, context, runtime, and measured tok/s evidence."
        ]
      },
      "scores": [
        {
          "source_id": "openevals",
          "metric": "openevals_aggregate",
          "value": 11.38,
          "normalized_0_100": 11.38,
          "scale": "0-100",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/OpenEvals/leaderboard-data",
          "source_dataset": "OpenEvals/leaderboard-data",
          "source_config": "default",
          "source_split": "train",
          "source_row_count": 105,
          "captured_at": "2026-05-28T11:57:28.766Z",
          "evidence_note": "External model quality prior from OpenEvals; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "openevals",
          "metric": "swe_pro",
          "value": 11.38,
          "normalized_0_100": 11.38,
          "scale": "0-100",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/OpenEvals/leaderboard-data",
          "source_dataset": "OpenEvals/leaderboard-data",
          "source_config": "default",
          "source_split": "train",
          "source_row_count": 105,
          "captured_at": "2026-05-28T11:57:28.766Z",
          "evidence_note": "External model quality prior from OpenEvals; not a RunLocalAI local execution measurement."
        }
      ]
    },
    {
      "model_key": "meta-llama-llama-3-1-405b-instruct",
      "display_name": "meta-llama/Llama-3.1-405B-Instruct",
      "provider": "meta-llama",
      "aliases": [
        "meta-llama/Llama-3.1-405B-Instruct"
      ],
      "openness": "open",
      "license": "Unknown",
      "parameter_billions": 405.9,
      "context_window": null,
      "modalities": [
        "text"
      ],
      "source_ids": [
        "openevals"
      ],
      "composite": {
        "score_0_100": 11.18,
        "confidence": "low",
        "components": [
          {
            "source_id": "openevals",
            "metric": "openevals_aggregate",
            "normalized_0_100": 11.18,
            "weight": 0.45
          }
        ],
        "derived_from": [
          "openevals:openevals_aggregate"
        ],
        "limitations": [
          "This is an external model-intelligence prior, not a RunLocalAI local speed or fit measurement.",
          "Provider papers, human-preference ratings, and LiveBench judgments are not interchangeable with quantized local execution.",
          "Use this score only beside Will-It-Run fit, quantization, context, runtime, and measured tok/s evidence."
        ]
      },
      "scores": [
        {
          "source_id": "openevals",
          "metric": "openevals_aggregate",
          "value": 11.18,
          "normalized_0_100": 11.18,
          "scale": "0-100",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/OpenEvals/leaderboard-data",
          "source_dataset": "OpenEvals/leaderboard-data",
          "source_config": "default",
          "source_split": "train",
          "source_row_count": 105,
          "captured_at": "2026-05-28T11:57:28.766Z",
          "evidence_note": "External model quality prior from OpenEvals; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "openevals",
          "metric": "swe_pro",
          "value": 11.18,
          "normalized_0_100": 11.18,
          "scale": "0-100",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/OpenEvals/leaderboard-data",
          "source_dataset": "OpenEvals/leaderboard-data",
          "source_config": "default",
          "source_split": "train",
          "source_row_count": 105,
          "captured_at": "2026-05-28T11:57:28.766Z",
          "evidence_note": "External model quality prior from OpenEvals; not a RunLocalAI local execution measurement."
        }
      ]
    },
    {
      "model_key": "helpingai-dhanishtha-2-0-0126",
      "display_name": "HelpingAI/Dhanishtha-2.0-0126",
      "provider": "HelpingAI",
      "aliases": [
        "HelpingAI/Dhanishtha-2.0-0126"
      ],
      "openness": "open",
      "license": "Unknown",
      "parameter_billions": 14.8,
      "context_window": null,
      "modalities": [
        "text"
      ],
      "source_ids": [
        "openevals"
      ],
      "composite": {
        "score_0_100": 9.92,
        "confidence": "low",
        "components": [
          {
            "source_id": "openevals",
            "metric": "openevals_aggregate",
            "normalized_0_100": 9.92,
            "weight": 0.45
          }
        ],
        "derived_from": [
          "openevals:openevals_aggregate"
        ],
        "limitations": [
          "This is an external model-intelligence prior, not a RunLocalAI local speed or fit measurement.",
          "Provider papers, human-preference ratings, and LiveBench judgments are not interchangeable with quantized local execution.",
          "Use this score only beside Will-It-Run fit, quantization, context, runtime, and measured tok/s evidence."
        ]
      },
      "scores": [
        {
          "source_id": "openevals",
          "metric": "hle",
          "value": 9.92,
          "normalized_0_100": 9.92,
          "scale": "0-100",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/OpenEvals/leaderboard-data",
          "source_dataset": "OpenEvals/leaderboard-data",
          "source_config": "default",
          "source_split": "train",
          "source_row_count": 105,
          "captured_at": "2026-05-28T11:57:28.766Z",
          "evidence_note": "External model quality prior from OpenEvals; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "openevals",
          "metric": "openevals_aggregate",
          "value": 9.92,
          "normalized_0_100": 9.92,
          "scale": "0-100",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/OpenEvals/leaderboard-data",
          "source_dataset": "OpenEvals/leaderboard-data",
          "source_config": "default",
          "source_split": "train",
          "source_row_count": 105,
          "captured_at": "2026-05-28T11:57:28.766Z",
          "evidence_note": "External model quality prior from OpenEvals; not a RunLocalAI local execution measurement."
        }
      ]
    },
    {
      "model_key": "qwen2-5-72b-instruct-turbo",
      "display_name": "qwen2.5-72b-instruct-turbo",
      "provider": null,
      "aliases": [
        "Qwen2.5-72B-Instruct-Turbo",
        "qwen2.5-72b-instruct-turbo"
      ],
      "openness": null,
      "license": null,
      "parameter_billions": null,
      "context_window": null,
      "modalities": [
        "text"
      ],
      "source_ids": [
        "livebench"
      ],
      "composite": {
        "score_0_100": 8.36,
        "confidence": "low",
        "components": [
          {
            "source_id": "livebench",
            "metric": "livebench_overall_mean_judgment",
            "normalized_0_100": 8.36,
            "weight": 0.2
          }
        ],
        "derived_from": [
          "livebench:livebench_overall_mean_judgment"
        ],
        "limitations": [
          "This is an external model-intelligence prior, not a RunLocalAI local speed or fit measurement.",
          "Provider papers, human-preference ratings, and LiveBench judgments are not interchangeable with quantized local execution.",
          "Use this score only beside Will-It-Run fit, quantization, context, runtime, and measured tok/s evidence."
        ]
      },
      "scores": [
        {
          "source_id": "livebench",
          "metric": "livebench_category_coding_mean_judgment",
          "value": 1,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "coding",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench category aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_category_instruction_following_mean_judgment",
          "value": 0.65,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "instruction_following",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench category aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_category_language_mean_judgment",
          "value": 0.083573,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "language",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench category aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_category_language_mean_judgment",
          "value": 1,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "language",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench category aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_overall_mean_judgment",
          "value": 0.083573,
          "normalized_0_100": 8.36,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench judgment aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_overall_mean_judgment",
          "value": 0.86,
          "normalized_0_100": 86,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench judgment aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_task_coding_completion_mean_judgment",
          "value": 1,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "coding",
          "task": "coding_completion",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench task aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_task_connections_mean_judgment",
          "value": 1,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "language",
          "task": "connections",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench task aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_task_lcb_generation_mean_judgment",
          "value": 1,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "coding",
          "task": "LCB_generation",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench task aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_task_paraphrase_mean_judgment",
          "value": 0.3,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "instruction_following",
          "task": "paraphrase",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench task aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_task_plot_unscrambling_mean_judgment",
          "value": 0.125359,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "language",
          "task": "plot_unscrambling",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench task aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_task_story_generation_mean_judgment",
          "value": 1,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "instruction_following",
          "task": "story_generation",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench task aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_task_typos_mean_judgment",
          "value": 0,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "language",
          "task": "typos",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench task aggregate; not a RunLocalAI local execution measurement."
        }
      ]
    },
    {
      "model_key": "qwen1-5-0-5b-chat",
      "display_name": "Qwen1.5-0.5B-Chat",
      "provider": null,
      "aliases": [
        "Qwen1.5-0.5B-Chat"
      ],
      "openness": null,
      "license": null,
      "parameter_billions": null,
      "context_window": null,
      "modalities": [
        "text"
      ],
      "source_ids": [
        "livebench"
      ],
      "composite": {
        "score_0_100": 6.25,
        "confidence": "low",
        "components": [
          {
            "source_id": "livebench",
            "metric": "livebench_overall_mean_judgment",
            "normalized_0_100": 6.25,
            "weight": 0.2
          }
        ],
        "derived_from": [
          "livebench:livebench_overall_mean_judgment"
        ],
        "limitations": [
          "This is an external model-intelligence prior, not a RunLocalAI local speed or fit measurement.",
          "Provider papers, human-preference ratings, and LiveBench judgments are not interchangeable with quantized local execution.",
          "Use this score only beside Will-It-Run fit, quantization, context, runtime, and measured tok/s evidence."
        ]
      },
      "scores": [
        {
          "source_id": "livebench",
          "metric": "livebench_category_coding_mean_judgment",
          "value": 0,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "coding",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench category aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_category_instruction_following_mean_judgment",
          "value": 0.25,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "instruction_following",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench category aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_category_language_mean_judgment",
          "value": 0,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "language",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench category aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_overall_mean_judgment",
          "value": 0.0625,
          "normalized_0_100": 6.25,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench judgment aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_task_coding_completion_mean_judgment",
          "value": 0,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "coding",
          "task": "coding_completion",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench task aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_task_connections_mean_judgment",
          "value": 0,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "language",
          "task": "connections",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench task aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_task_lcb_generation_mean_judgment",
          "value": 0,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "coding",
          "task": "LCB_generation",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench task aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_task_story_generation_mean_judgment",
          "value": 0.25,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "instruction_following",
          "task": "story_generation",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench task aggregate; not a RunLocalAI local execution measurement."
        }
      ]
    },
    {
      "model_key": "meta-llama-llama-4-maverick-17b-128e-instruct",
      "display_name": "meta-llama/Llama-4-Maverick-17B-128E-Instruct",
      "provider": "meta-llama",
      "aliases": [
        "meta-llama/Llama-4-Maverick-17B-128E-Instruct"
      ],
      "openness": "open",
      "license": "Unknown",
      "parameter_billions": 401.6,
      "context_window": null,
      "modalities": [
        "text"
      ],
      "source_ids": [
        "openevals"
      ],
      "composite": {
        "score_0_100": 5.24,
        "confidence": "low",
        "components": [
          {
            "source_id": "openevals",
            "metric": "openevals_aggregate",
            "normalized_0_100": 5.24,
            "weight": 0.45
          }
        ],
        "derived_from": [
          "openevals:openevals_aggregate"
        ],
        "limitations": [
          "This is an external model-intelligence prior, not a RunLocalAI local speed or fit measurement.",
          "Provider papers, human-preference ratings, and LiveBench judgments are not interchangeable with quantized local execution.",
          "Use this score only beside Will-It-Run fit, quantization, context, runtime, and measured tok/s evidence."
        ]
      },
      "scores": [
        {
          "source_id": "openevals",
          "metric": "openevals_aggregate",
          "value": 5.24,
          "normalized_0_100": 5.24,
          "scale": "0-100",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/OpenEvals/leaderboard-data",
          "source_dataset": "OpenEvals/leaderboard-data",
          "source_config": "default",
          "source_split": "train",
          "source_row_count": 105,
          "captured_at": "2026-05-28T11:57:28.766Z",
          "evidence_note": "External model quality prior from OpenEvals; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "openevals",
          "metric": "swe_pro",
          "value": 5.24,
          "normalized_0_100": 5.24,
          "scale": "0-100",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/OpenEvals/leaderboard-data",
          "source_dataset": "OpenEvals/leaderboard-data",
          "source_config": "default",
          "source_split": "train",
          "source_row_count": 105,
          "captured_at": "2026-05-28T11:57:28.766Z",
          "evidence_note": "External model quality prior from OpenEvals; not a RunLocalAI local execution measurement."
        }
      ]
    },
    {
      "model_key": "command-r-03-2024",
      "display_name": "command-r-03-2024",
      "provider": null,
      "aliases": [
        "command-r-03-2024"
      ],
      "openness": null,
      "license": null,
      "parameter_billions": null,
      "context_window": null,
      "modalities": [
        "text"
      ],
      "source_ids": [
        "livebench"
      ],
      "composite": {
        "score_0_100": 5,
        "confidence": "low",
        "components": [
          {
            "source_id": "livebench",
            "metric": "livebench_overall_mean_judgment",
            "normalized_0_100": 5,
            "weight": 0.2
          }
        ],
        "derived_from": [
          "livebench:livebench_overall_mean_judgment"
        ],
        "limitations": [
          "This is an external model-intelligence prior, not a RunLocalAI local speed or fit measurement.",
          "Provider papers, human-preference ratings, and LiveBench judgments are not interchangeable with quantized local execution.",
          "Use this score only beside Will-It-Run fit, quantization, context, runtime, and measured tok/s evidence."
        ]
      },
      "scores": [
        {
          "source_id": "livebench",
          "metric": "livebench_category_coding_mean_judgment",
          "value": 0,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "coding",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench category aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_category_instruction_following_mean_judgment",
          "value": 0.25,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "instruction_following",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench category aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_category_language_mean_judgment",
          "value": 0,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "language",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench category aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_overall_mean_judgment",
          "value": 0.05,
          "normalized_0_100": 5,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench judgment aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_task_coding_completion_mean_judgment",
          "value": 0,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "coding",
          "task": "coding_completion",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench task aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_task_connections_mean_judgment",
          "value": 0,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "language",
          "task": "connections",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench task aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_task_lcb_generation_mean_judgment",
          "value": 0,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "coding",
          "task": "LCB_generation",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench task aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_task_story_generation_mean_judgment",
          "value": 0.25,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "instruction_following",
          "task": "story_generation",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench task aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_task_typos_mean_judgment",
          "value": 0,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "language",
          "task": "typos",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench task aggregate; not a RunLocalAI local execution measurement."
        }
      ]
    },
    {
      "model_key": "deepseek-v2-lite-chat",
      "display_name": "DeepSeek-V2-Lite-Chat",
      "provider": null,
      "aliases": [
        "DeepSeek-V2-Lite-Chat"
      ],
      "openness": null,
      "license": null,
      "parameter_billions": null,
      "context_window": null,
      "modalities": [
        "text"
      ],
      "source_ids": [
        "livebench"
      ],
      "composite": {
        "score_0_100": 4.96,
        "confidence": "low",
        "components": [
          {
            "source_id": "livebench",
            "metric": "livebench_overall_mean_judgment",
            "normalized_0_100": 4.96,
            "weight": 0.2
          }
        ],
        "derived_from": [
          "livebench:livebench_overall_mean_judgment"
        ],
        "limitations": [
          "This is an external model-intelligence prior, not a RunLocalAI local speed or fit measurement.",
          "Provider papers, human-preference ratings, and LiveBench judgments are not interchangeable with quantized local execution.",
          "Use this score only beside Will-It-Run fit, quantization, context, runtime, and measured tok/s evidence."
        ]
      },
      "scores": [
        {
          "source_id": "livebench",
          "metric": "livebench_category_coding_mean_judgment",
          "value": 0,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "coding",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench category aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_category_instruction_following_mean_judgment",
          "value": 0.25,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "instruction_following",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench category aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_category_language_mean_judgment",
          "value": 0.02381,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "language",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench category aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_overall_mean_judgment",
          "value": 0.049603,
          "normalized_0_100": 4.96,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench judgment aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_task_coding_completion_mean_judgment",
          "value": 0,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "coding",
          "task": "coding_completion",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench task aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_task_connections_mean_judgment",
          "value": 0,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "language",
          "task": "connections",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench task aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_task_lcb_generation_mean_judgment",
          "value": 0,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "coding",
          "task": "LCB_generation",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench task aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_task_plot_unscrambling_mean_judgment",
          "value": 0.047619,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "language",
          "task": "plot_unscrambling",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench task aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_task_story_generation_mean_judgment",
          "value": 0.25,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "instruction_following",
          "task": "story_generation",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench task aggregate; not a RunLocalAI local execution measurement."
        }
      ]
    },
    {
      "model_key": "qwen2-0-5b-instruct",
      "display_name": "Qwen2-0.5B-Instruct",
      "provider": null,
      "aliases": [
        "Qwen2-0.5B-Instruct"
      ],
      "openness": null,
      "license": null,
      "parameter_billions": null,
      "context_window": null,
      "modalities": [
        "text"
      ],
      "source_ids": [
        "livebench"
      ],
      "composite": {
        "score_0_100": 4.17,
        "confidence": "low",
        "components": [
          {
            "source_id": "livebench",
            "metric": "livebench_overall_mean_judgment",
            "normalized_0_100": 4.17,
            "weight": 0.2
          }
        ],
        "derived_from": [
          "livebench:livebench_overall_mean_judgment"
        ],
        "limitations": [
          "This is an external model-intelligence prior, not a RunLocalAI local speed or fit measurement.",
          "Provider papers, human-preference ratings, and LiveBench judgments are not interchangeable with quantized local execution.",
          "Use this score only beside Will-It-Run fit, quantization, context, runtime, and measured tok/s evidence."
        ]
      },
      "scores": [
        {
          "source_id": "livebench",
          "metric": "livebench_category_coding_mean_judgment",
          "value": 0,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "coding",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench category aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_category_instruction_following_mean_judgment",
          "value": 0.25,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "instruction_following",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench category aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_category_language_mean_judgment",
          "value": 0,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "language",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench category aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_overall_mean_judgment",
          "value": 0.041667,
          "normalized_0_100": 4.17,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench judgment aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_task_coding_completion_mean_judgment",
          "value": 0,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "coding",
          "task": "coding_completion",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench task aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_task_connections_mean_judgment",
          "value": 0,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "language",
          "task": "connections",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench task aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_task_lcb_generation_mean_judgment",
          "value": 0,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "coding",
          "task": "LCB_generation",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench task aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_task_story_generation_mean_judgment",
          "value": 0.25,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "instruction_following",
          "task": "story_generation",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench task aggregate; not a RunLocalAI local execution measurement."
        }
      ]
    },
    {
      "model_key": "mistral-large",
      "display_name": "mistral-large",
      "provider": null,
      "aliases": [
        "mistral-large"
      ],
      "openness": null,
      "license": null,
      "parameter_billions": null,
      "context_window": null,
      "modalities": [
        "text"
      ],
      "source_ids": [
        "livebench"
      ],
      "composite": {
        "score_0_100": 1.48,
        "confidence": "low",
        "components": [
          {
            "source_id": "livebench",
            "metric": "livebench_overall_mean_judgment",
            "normalized_0_100": 1.48,
            "weight": 0.2
          }
        ],
        "derived_from": [
          "livebench:livebench_overall_mean_judgment"
        ],
        "limitations": [
          "This is an external model-intelligence prior, not a RunLocalAI local speed or fit measurement.",
          "Provider papers, human-preference ratings, and LiveBench judgments are not interchangeable with quantized local execution.",
          "Use this score only beside Will-It-Run fit, quantization, context, runtime, and measured tok/s evidence."
        ]
      },
      "scores": [
        {
          "source_id": "livebench",
          "metric": "livebench_category_language_mean_judgment",
          "value": 0.014833,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "language",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench category aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_overall_mean_judgment",
          "value": 0.014833,
          "normalized_0_100": 1.48,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench judgment aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_task_plot_unscrambling_mean_judgment",
          "value": 0.022249,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "language",
          "task": "plot_unscrambling",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench task aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_task_typos_mean_judgment",
          "value": 0,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "language",
          "task": "typos",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench task aggregate; not a RunLocalAI local execution measurement."
        }
      ]
    },
    {
      "model_key": "qwen2-5-7b-instruct-turbo",
      "display_name": "qwen2.5-7b-instruct-turbo",
      "provider": null,
      "aliases": [
        "Qwen2.5-7B-Instruct-Turbo",
        "qwen2.5-7b-instruct-turbo"
      ],
      "openness": null,
      "license": null,
      "parameter_billions": null,
      "context_window": null,
      "modalities": [
        "text"
      ],
      "source_ids": [
        "livebench"
      ],
      "composite": {
        "score_0_100": 1.21,
        "confidence": "low",
        "components": [
          {
            "source_id": "livebench",
            "metric": "livebench_overall_mean_judgment",
            "normalized_0_100": 1.21,
            "weight": 0.2
          }
        ],
        "derived_from": [
          "livebench:livebench_overall_mean_judgment"
        ],
        "limitations": [
          "This is an external model-intelligence prior, not a RunLocalAI local speed or fit measurement.",
          "Provider papers, human-preference ratings, and LiveBench judgments are not interchangeable with quantized local execution.",
          "Use this score only beside Will-It-Run fit, quantization, context, runtime, and measured tok/s evidence."
        ]
      },
      "scores": [
        {
          "source_id": "livebench",
          "metric": "livebench_category_coding_mean_judgment",
          "value": 0.6,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "coding",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench category aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_category_instruction_following_mean_judgment",
          "value": 0.65,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "instruction_following",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench category aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_category_language_mean_judgment",
          "value": 0.012121,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "language",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench category aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_category_language_mean_judgment",
          "value": 0.5,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "language",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench category aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_overall_mean_judgment",
          "value": 0.012121,
          "normalized_0_100": 1.21,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench judgment aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_overall_mean_judgment",
          "value": 0.6,
          "normalized_0_100": 60,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench judgment aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_task_coding_completion_mean_judgment",
          "value": 1,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "coding",
          "task": "coding_completion",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench task aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_task_connections_mean_judgment",
          "value": 0.5,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "language",
          "task": "connections",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench task aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_task_lcb_generation_mean_judgment",
          "value": 0.5,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "coding",
          "task": "LCB_generation",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench task aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_task_paraphrase_mean_judgment",
          "value": 0.3,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "instruction_following",
          "task": "paraphrase",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench task aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_task_plot_unscrambling_mean_judgment",
          "value": 0.018182,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "language",
          "task": "plot_unscrambling",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench task aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_task_story_generation_mean_judgment",
          "value": 1,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "instruction_following",
          "task": "story_generation",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench task aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_task_typos_mean_judgment",
          "value": 0,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "language",
          "task": "typos",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench task aggregate; not a RunLocalAI local execution measurement."
        }
      ]
    },
    {
      "model_key": "perplexity-sonar-reasoning",
      "display_name": "perplexity-sonar-reasoning",
      "provider": null,
      "aliases": [
        "perplexity-sonar-reasoning"
      ],
      "openness": null,
      "license": null,
      "parameter_billions": null,
      "context_window": null,
      "modalities": [
        "text"
      ],
      "source_ids": [
        "livebench"
      ],
      "composite": {
        "score_0_100": 0,
        "confidence": "low",
        "components": [
          {
            "source_id": "livebench",
            "metric": "livebench_overall_mean_judgment",
            "normalized_0_100": 0,
            "weight": 0.2
          }
        ],
        "derived_from": [
          "livebench:livebench_overall_mean_judgment"
        ],
        "limitations": [
          "This is an external model-intelligence prior, not a RunLocalAI local speed or fit measurement.",
          "Provider papers, human-preference ratings, and LiveBench judgments are not interchangeable with quantized local execution.",
          "Use this score only beside Will-It-Run fit, quantization, context, runtime, and measured tok/s evidence."
        ]
      },
      "scores": [
        {
          "source_id": "livebench",
          "metric": "livebench_category_coding_mean_judgment",
          "value": 0,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "coding",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench category aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_overall_mean_judgment",
          "value": 0,
          "normalized_0_100": 0,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench judgment aggregate; not a RunLocalAI local execution measurement."
        },
        {
          "source_id": "livebench",
          "metric": "livebench_task_coding_completion_mean_judgment",
          "value": 0,
          "normalized_0_100": null,
          "scale": "mean-judgment-score",
          "confidence": "sourced",
          "source_url": "https://huggingface.co/datasets/livebench/model_judgment",
          "source_dataset": "livebench/model_judgment",
          "source_config": "default",
          "source_split": "leaderboard",
          "source_row_count": 60372,
          "category": "coding",
          "task": "coding_completion",
          "captured_at": "2026-05-28T11:57:53.182Z",
          "evidence_note": "External LiveBench task aggregate; not a RunLocalAI local execution measurement."
        }
      ]
    }
  ],
  "meta": {
    "model_count": 565,
    "score_count": 2759,
    "source_count": 3,
    "confidence_counts": {
      "high": 0,
      "moderate": 64,
      "low": 501
    },
    "generated_from": [
      "OpenEvals/leaderboard-data:default:train",
      "lmarena-ai/leaderboard-dataset:text:latest",
      "livebench/model_judgment:default:leaderboard"
    ]
  }
}
