{
  "evidenceVersion": "local-ollama-speed-v1",
  "capturedAt": "2026-05-28T04:17:19.929Z",
  "hardwareSlug": "rtx-5080",
  "modelTag": "llama3.1:8b",
  "catalogSlug": "llama-3.1-8b-instruct",
  "quant": "Q4_K_M",
  "contextSize": 4096,
  "numPredict": 256,
  "prompt": "Write a detailed explanation of how transformer attention works.",
  "scenario": "single-stream",
  "runtime": "ollama-api",
  "command": "POST http://localhost:11434/api/generate model=llama3.1:8b temperature=0 top_p=1 seed=42 num_ctx=4096 num_predict=256",
  "env": {
    "os": "Microsoft Windows [Version 10.0.26200.8457]",
    "platform": "win32",
    "arch": "x64",
    "cpu": "AMD Ryzen 7 5800X3D 8-Core Processor           ",
    "cpuThreads": 16,
    "ramGb": 31.9,
    "gpu": {
      "name": "NVIDIA GeForce RTX 5080",
      "driver": "595.97",
      "memoryTotalMb": 16303,
      "memoryUsedMb": 9937,
      "tempC": 43,
      "smClockMhz": 1875,
      "powerDrawW": 39.21
    },
    "ollamaVersionCli": null,
    "ollamaApiVersion": {
      "version": "0.24.0"
    }
  },
  "ollamaTagDetails": {
    "name": "llama3.1:8b",
    "model": "llama3.1:8b",
    "modified_at": "2026-05-11T13:31:39.5974226-07:00",
    "size": 4920753328,
    "digest": "46e0c10c039e019119339687c3c1757cc81b9da49709a3b3924863ba87ca666e",
    "details": {
      "parent_model": "",
      "format": "gguf",
      "family": "llama",
      "families": [
        "llama"
      ],
      "parameter_size": "8.0B",
      "quantization_level": "Q4_K_M"
    }
  },
  "notes": null,
  "logHashSha256": "908b1e87e7b89d2e99d4d7558fdca446bd1bacf36049800125c30de6bda031a2",
  "runsCaptured": 5,
  "decodeStatsAllRuns": {
    "min": 134.53704043841836,
    "max": 137.1595710162967,
    "median": 136.52128305474903,
    "p5": 134.54909379115617,
    "p95": 137.05557950234407,
    "mean": 135.890963031621,
    "n": 5
  },
  "decodeStatsSteadyRuns": {
    "min": 134.53704043841836,
    "max": 137.1595710162967,
    "median": 135.61846032432047,
    "p5": 134.54608045297172,
    "p95": 137.08157738083221,
    "mean": 135.733383025839,
    "n": 4
  },
  "runs": [
    {
      "runIndex": 1,
      "startedAt": "2026-05-28T04:17:04.767Z",
      "finishedAt": "2026-05-28T04:17:11.047Z",
      "rawPath": "public/benchmarks/evidence/local-ollama-rtx-5080-2026-05-28/llama3-1-8b/run1.ollama.json",
      "logPath": "public/benchmarks/evidence/local-ollama-rtx-5080-2026-05-28/llama3-1-8b/run1.log",
      "decodeTokPerSec": 136.52128305474903,
      "prefillTokPerSec": 1170.7271386258005,
      "totalMs": 6236.2311,
      "loadMs": 4231.3617,
      "promptEvalMs": 17.0834,
      "evalMs": 1875.1655,
      "promptTokens": 20,
      "responseTokens": 256,
      "responseHashSha256": "b2329ae2571cb519c45b2ef877b56fc640e8c3577bb54e77f844bebd03b281b5",
      "beforeGpu": {
        "name": "NVIDIA GeForce RTX 5080",
        "driver": "595.97",
        "memoryTotalMb": 16303,
        "memoryUsedMb": 10117,
        "tempC": 61,
        "smClockMhz": 2857,
        "powerDrawW": 271.93
      },
      "afterGpu": {
        "name": "NVIDIA GeForce RTX 5080",
        "driver": "595.97",
        "memoryTotalMb": 16303,
        "memoryUsedMb": 7372,
        "tempC": 61,
        "smClockMhz": 2857,
        "powerDrawW": 277.21
      }
    },
    {
      "runIndex": 2,
      "startedAt": "2026-05-28T04:17:11.085Z",
      "finishedAt": "2026-05-28T04:17:13.246Z",
      "rawPath": "public/benchmarks/evidence/local-ollama-rtx-5080-2026-05-28/llama3-1-8b/run2.ollama.json",
      "logPath": "public/benchmarks/evidence/local-ollama-rtx-5080-2026-05-28/llama3-1-8b/run2.log",
      "decodeTokPerSec": 134.59730720210737,
      "prefillTokPerSec": 2139.197586985122,
      "totalMs": 2117.8669,
      "loadMs": 113.5568,
      "promptEvalMs": 9.3493,
      "evalMs": 1901.9697,
      "promptTokens": 20,
      "responseTokens": 256,
      "responseHashSha256": "b2329ae2571cb519c45b2ef877b56fc640e8c3577bb54e77f844bebd03b281b5",
      "beforeGpu": {
        "name": "NVIDIA GeForce RTX 5080",
        "driver": "595.97",
        "memoryTotalMb": 16303,
        "memoryUsedMb": 7372,
        "tempC": 61,
        "smClockMhz": 2857,
        "powerDrawW": 277.21
      },
      "afterGpu": {
        "name": "NVIDIA GeForce RTX 5080",
        "driver": "595.97",
        "memoryTotalMb": 16303,
        "memoryUsedMb": 7379,
        "tempC": 62,
        "smClockMhz": 2857,
        "powerDrawW": 276.8
      }
    },
    {
      "runIndex": 3,
      "startedAt": "2026-05-28T04:17:13.282Z",
      "finishedAt": "2026-05-28T04:17:15.472Z",
      "rawPath": "public/benchmarks/evidence/local-ollama-rtx-5080-2026-05-28/llama3-1-8b/run3.ollama.json",
      "logPath": "public/benchmarks/evidence/local-ollama-rtx-5080-2026-05-28/llama3-1-8b/run3.log",
      "decodeTokPerSec": 134.53704043841836,
      "prefillTokPerSec": 2128.1350088849636,
      "totalMs": 2138.8779,
      "loadMs": 112.5922,
      "promptEvalMs": 9.3979,
      "evalMs": 1902.8217,
      "promptTokens": 20,
      "responseTokens": 256,
      "responseHashSha256": "b2329ae2571cb519c45b2ef877b56fc640e8c3577bb54e77f844bebd03b281b5",
      "beforeGpu": {
        "name": "NVIDIA GeForce RTX 5080",
        "driver": "595.97",
        "memoryTotalMb": 16303,
        "memoryUsedMb": 7379,
        "tempC": 58,
        "smClockMhz": 2857,
        "powerDrawW": 273.2
      },
      "afterGpu": {
        "name": "NVIDIA GeForce RTX 5080",
        "driver": "595.97",
        "memoryTotalMb": 16303,
        "memoryUsedMb": 7402,
        "tempC": 62,
        "smClockMhz": 2857,
        "powerDrawW": 275.8
      }
    },
    {
      "runIndex": 4,
      "startedAt": "2026-05-28T04:17:15.519Z",
      "finishedAt": "2026-05-28T04:17:17.717Z",
      "rawPath": "public/benchmarks/evidence/local-ollama-rtx-5080-2026-05-28/llama3-1-8b/run4.ollama.json",
      "logPath": "public/benchmarks/evidence/local-ollama-rtx-5080-2026-05-28/llama3-1-8b/run4.log",
      "decodeTokPerSec": 136.63961344653356,
      "prefillTokPerSec": 2119.0930281839373,
      "totalMs": 2148.4459,
      "loadMs": 130.5208,
      "promptEvalMs": 9.438,
      "evalMs": 1873.5416,
      "promptTokens": 20,
      "responseTokens": 256,
      "responseHashSha256": "b2329ae2571cb519c45b2ef877b56fc640e8c3577bb54e77f844bebd03b281b5",
      "beforeGpu": {
        "name": "NVIDIA GeForce RTX 5080",
        "driver": "595.97",
        "memoryTotalMb": 16303,
        "memoryUsedMb": 7403,
        "tempC": 62,
        "smClockMhz": 2857,
        "powerDrawW": 275.8
      },
      "afterGpu": {
        "name": "NVIDIA GeForce RTX 5080",
        "driver": "595.97",
        "memoryTotalMb": 16303,
        "memoryUsedMb": 7384,
        "tempC": 63,
        "smClockMhz": 2857,
        "powerDrawW": 276.18
      }
    },
    {
      "runIndex": 5,
      "startedAt": "2026-05-28T04:17:17.757Z",
      "finishedAt": "2026-05-28T04:17:19.928Z",
      "rawPath": "public/benchmarks/evidence/local-ollama-rtx-5080-2026-05-28/llama3-1-8b/run5.ollama.json",
      "logPath": "public/benchmarks/evidence/local-ollama-rtx-5080-2026-05-28/llama3-1-8b/run5.log",
      "decodeTokPerSec": 137.1595710162967,
      "prefillTokPerSec": 2142.8647959457,
      "totalMs": 2128.1175,
      "loadMs": 127.1029,
      "promptEvalMs": 9.3333,
      "evalMs": 1866.4392,
      "promptTokens": 20,
      "responseTokens": 256,
      "responseHashSha256": "b2329ae2571cb519c45b2ef877b56fc640e8c3577bb54e77f844bebd03b281b5",
      "beforeGpu": {
        "name": "NVIDIA GeForce RTX 5080",
        "driver": "595.97",
        "memoryTotalMb": 16303,
        "memoryUsedMb": 7384,
        "tempC": 63,
        "smClockMhz": 2857,
        "powerDrawW": 276.18
      },
      "afterGpu": {
        "name": "NVIDIA GeForce RTX 5080",
        "driver": "595.97",
        "memoryTotalMb": 16303,
        "memoryUsedMb": 7381,
        "tempC": 64,
        "smClockMhz": 2857,
        "powerDrawW": 279.42
      }
    }
  ],
  "errors": [],
  "uploadEligible": true
}