{
  "evidenceVersion": "local-ollama-speed-v1",
  "capturedAt": "2026-05-28T04:43:19.112Z",
  "hardwareSlug": "rtx-5080",
  "modelTag": "qwen2.5-coder:14b",
  "catalogSlug": "qwen-2.5-coder-14b-instruct",
  "quant": "Q4_K_M",
  "contextSize": 4096,
  "numPredict": 256,
  "prompt": "Write a detailed explanation of how transformer attention works.",
  "scenario": "single-stream",
  "runtime": "ollama-api",
  "command": "POST http://localhost:11434/api/generate model=qwen2.5-coder:14b temperature=0 top_p=1 seed=42 num_ctx=4096 num_predict=256",
  "env": {
    "os": "Microsoft Windows [Version 10.0.26200.8457]",
    "platform": "win32",
    "arch": "x64",
    "cpu": "AMD Ryzen 7 5800X3D 8-Core Processor           ",
    "cpuThreads": 16,
    "ramGb": 31.9,
    "gpu": {
      "name": "NVIDIA GeForce RTX 5080",
      "driver": "595.97",
      "memoryTotalMb": 16303,
      "memoryUsedMb": 9937,
      "tempC": 43,
      "smClockMhz": 1875,
      "powerDrawW": 39.21
    },
    "ollamaVersionCli": null,
    "ollamaApiVersion": {
      "version": "0.24.0"
    }
  },
  "ollamaTagDetails": {
    "name": "qwen2.5-coder:14b",
    "model": "qwen2.5-coder:14b",
    "modified_at": "2026-04-01T04:00:36.0733043-07:00",
    "size": 8988124298,
    "digest": "9ec8897f747e246e970bc5cfdda85d22f1123dc2e3d34978a010a75968716849",
    "details": {
      "parent_model": "",
      "format": "gguf",
      "family": "qwen2",
      "families": [
        "qwen2"
      ],
      "parameter_size": "14.8B",
      "quantization_level": "Q4_K_M"
    }
  },
  "notes": null,
  "logHashSha256": "9d89179618d473a1ee4815ea21d6ef6adc8370280e7b8453b6897ee05ee16837",
  "runsCaptured": 5,
  "decodeStatsAllRuns": {
    "min": 77.3996803393202,
    "max": 79.10844290419207,
    "median": 78.97568048808945,
    "p5": 77.60836726814617,
    "p95": 79.08941904151884,
    "mean": 78.58804846117553,
    "n": 5
  },
  "decodeStatsSteadyRuns": {
    "min": 78.44311498345003,
    "max": 79.10844290419207,
    "median": 78.99450203945769,
    "p5": 78.52299980914594,
    "p95": 79.09417500718715,
    "mean": 78.88514049163938,
    "n": 4
  },
  "runs": [
    {
      "runIndex": 1,
      "startedAt": "2026-05-28T04:42:37.657Z",
      "finishedAt": "2026-05-28T04:42:53.220Z",
      "rawPath": "public/benchmarks/evidence/local-ollama-rtx-5080-2026-05-28/qwen2-5-coder-14b/run1.ollama.json",
      "logPath": "public/benchmarks/evidence/local-ollama-rtx-5080-2026-05-28/qwen2-5-coder-14b/run1.log",
      "decodeTokPerSec": 77.3996803393202,
      "prefillTokPerSec": 1102.3831261889486,
      "totalMs": 15517.381,
      "loadMs": 11913.504,
      "promptEvalMs": 35.3779,
      "evalMs": 3307.5072,
      "promptTokens": 39,
      "responseTokens": 256,
      "responseHashSha256": "0395f0ffd6fbd06be95704e2288d647d22178305d99198e0a033bf504b2414c9",
      "beforeGpu": {
        "name": "NVIDIA GeForce RTX 5080",
        "driver": "595.97",
        "memoryTotalMb": 16303,
        "memoryUsedMb": 1598,
        "tempC": 54,
        "smClockMhz": 2872,
        "powerDrawW": 214.17
      },
      "afterGpu": {
        "name": "NVIDIA GeForce RTX 5080",
        "driver": "595.97",
        "memoryTotalMb": 16303,
        "memoryUsedMb": 11457,
        "tempC": 62,
        "smClockMhz": 2857,
        "powerDrawW": 287.95
      }
    },
    {
      "runIndex": 2,
      "startedAt": "2026-05-28T04:42:53.268Z",
      "finishedAt": "2026-05-28T04:43:07.982Z",
      "rawPath": "public/benchmarks/evidence/local-ollama-rtx-5080-2026-05-28/qwen2-5-coder-14b/run2.ollama.json",
      "logPath": "public/benchmarks/evidence/local-ollama-rtx-5080-2026-05-28/qwen2-5-coder-14b/run2.log",
      "decodeTokPerSec": 79.01332359082592,
      "prefillTokPerSec": 1291.2624573717842,
      "totalMs": 14667.0307,
      "loadMs": 11131.1457,
      "promptEvalMs": 30.203,
      "evalMs": 3239.9599,
      "promptTokens": 39,
      "responseTokens": 256,
      "responseHashSha256": "0395f0ffd6fbd06be95704e2288d647d22178305d99198e0a033bf504b2414c9",
      "beforeGpu": {
        "name": "NVIDIA GeForce RTX 5080",
        "driver": "595.97",
        "memoryTotalMb": 16303,
        "memoryUsedMb": 11469,
        "tempC": 62,
        "smClockMhz": 2857,
        "powerDrawW": 287.95
      },
      "afterGpu": {
        "name": "NVIDIA GeForce RTX 5080",
        "driver": "595.97",
        "memoryTotalMb": 16303,
        "memoryUsedMb": 11190,
        "tempC": 63,
        "smClockMhz": 2857,
        "powerDrawW": 284.71
      }
    },
    {
      "runIndex": 3,
      "startedAt": "2026-05-28T04:43:08.019Z",
      "finishedAt": "2026-05-28T04:43:11.690Z",
      "rawPath": "public/benchmarks/evidence/local-ollama-rtx-5080-2026-05-28/qwen2-5-coder-14b/run3.ollama.json",
      "logPath": "public/benchmarks/evidence/local-ollama-rtx-5080-2026-05-28/qwen2-5-coder-14b/run3.log",
      "decodeTokPerSec": 79.10844290419207,
      "prefillTokPerSec": 2517.152132802365,
      "totalMs": 3627.5258,
      "loadMs": 107.6652,
      "promptEvalMs": 15.4937,
      "evalMs": 3236.0642,
      "promptTokens": 39,
      "responseTokens": 256,
      "responseHashSha256": "0395f0ffd6fbd06be95704e2288d647d22178305d99198e0a033bf504b2414c9",
      "beforeGpu": {
        "name": "NVIDIA GeForce RTX 5080",
        "driver": "595.97",
        "memoryTotalMb": 16303,
        "memoryUsedMb": 11190,
        "tempC": 63,
        "smClockMhz": 2857,
        "powerDrawW": 284.71
      },
      "afterGpu": {
        "name": "NVIDIA GeForce RTX 5080",
        "driver": "595.97",
        "memoryTotalMb": 16303,
        "memoryUsedMb": 11179,
        "tempC": 65,
        "smClockMhz": 2857,
        "powerDrawW": 289.14
      }
    },
    {
      "runIndex": 4,
      "startedAt": "2026-05-28T04:43:11.726Z",
      "finishedAt": "2026-05-28T04:43:15.397Z",
      "rawPath": "public/benchmarks/evidence/local-ollama-rtx-5080-2026-05-28/qwen2-5-coder-14b/run4.ollama.json",
      "logPath": "public/benchmarks/evidence/local-ollama-rtx-5080-2026-05-28/qwen2-5-coder-14b/run4.log",
      "decodeTokPerSec": 78.97568048808945,
      "prefillTokPerSec": 2565.2662943741734,
      "totalMs": 3627.4724,
      "loadMs": 94.1237,
      "promptEvalMs": 15.2031,
      "evalMs": 3241.5042,
      "promptTokens": 39,
      "responseTokens": 256,
      "responseHashSha256": "0395f0ffd6fbd06be95704e2288d647d22178305d99198e0a033bf504b2414c9",
      "beforeGpu": {
        "name": "NVIDIA GeForce RTX 5080",
        "driver": "595.97",
        "memoryTotalMb": 16303,
        "memoryUsedMb": 11179,
        "tempC": 65,
        "smClockMhz": 2857,
        "powerDrawW": 289.14
      },
      "afterGpu": {
        "name": "NVIDIA GeForce RTX 5080",
        "driver": "595.97",
        "memoryTotalMb": 16303,
        "memoryUsedMb": 11192,
        "tempC": 65,
        "smClockMhz": 2857,
        "powerDrawW": 289.48
      }
    },
    {
      "runIndex": 5,
      "startedAt": "2026-05-28T04:43:15.434Z",
      "finishedAt": "2026-05-28T04:43:19.111Z",
      "rawPath": "public/benchmarks/evidence/local-ollama-rtx-5080-2026-05-28/qwen2-5-coder-14b/run5.ollama.json",
      "logPath": "public/benchmarks/evidence/local-ollama-rtx-5080-2026-05-28/qwen2-5-coder-14b/run5.log",
      "decodeTokPerSec": 78.44311498345003,
      "prefillTokPerSec": 2520.6988152715567,
      "totalMs": 3631.7968,
      "loadMs": 94.4552,
      "promptEvalMs": 15.4719,
      "evalMs": 3263.5114,
      "promptTokens": 39,
      "responseTokens": 256,
      "responseHashSha256": "0395f0ffd6fbd06be95704e2288d647d22178305d99198e0a033bf504b2414c9",
      "beforeGpu": {
        "name": "NVIDIA GeForce RTX 5080",
        "driver": "595.97",
        "memoryTotalMb": 16303,
        "memoryUsedMb": 11192,
        "tempC": 65,
        "smClockMhz": 2857,
        "powerDrawW": 289.48
      },
      "afterGpu": {
        "name": "NVIDIA GeForce RTX 5080",
        "driver": "595.97",
        "memoryTotalMb": 16303,
        "memoryUsedMb": 11172,
        "tempC": 66,
        "smClockMhz": 2857,
        "powerDrawW": 289.99
      }
    }
  ],
  "errors": [],
  "uploadEligible": true
}