{
  "evidenceVersion": "local-ollama-speed-v1",
  "capturedAt": "2026-06-02T01:04:06.572Z",
  "hardwareSlug": "rtx-3080-16gb-mobile",
  "modelTag": "mistral-nemo:12b",
  "catalogSlug": "mistral-nemo-12b",
  "quant": "Q4_K_M",
  "contextSize": 4096,
  "numPredict": 256,
  "prompt": "Write a detailed explanation of how transformer attention works.",
  "scenario": "single-stream",
  "runtime": "ollama-api",
  "command": "POST http://localhost:11434/api/generate model=mistral-nemo:12b temperature=0 top_p=1 seed=42 num_ctx=4096 num_predict=256",
  "env": {
    "os": "Microsoft Windows [Version 10.0.26200.8457]",
    "platform": "win32",
    "arch": "x64",
    "cpu": "AMD Ryzen 9 5900HX with Radeon Graphics        ",
    "cpuThreads": 16,
    "ramGb": 31.9,
    "gpu": {
      "vendor": "nvidia",
      "name": "NVIDIA GeForce RTX 3080 Laptop GPU",
      "driver": "571.96",
      "memoryTotalMb": 16384,
      "memoryUsedMb": 1830,
      "tempC": 53,
      "smClockMhz": 210,
      "powerDrawW": 11.6
    },
    "ollamaVersionCli": "ollama version is 0.24.0",
    "ollamaApiVersion": {
      "version": "0.24.0"
    }
  },
  "ollamaTagDetails": {
    "name": "mistral-nemo:12b",
    "model": "mistral-nemo:12b",
    "modified_at": "2026-06-02T04:03:30.3171168+03:00",
    "size": 7071713227,
    "digest": "e7e06d107c6c86ed0cf45445f1790720b5092149c4c95f4d965844e9afbfdc89",
    "details": {
      "parent_model": "",
      "format": "gguf",
      "family": "llama",
      "families": [
        "llama"
      ],
      "parameter_size": "12.2B",
      "quantization_level": "Q4_0"
    }
  },
  "notes": null,
  "logHashSha256": "1ae2a3aa9abb0fd0463b76c50504900c69ba09a5d7356e4acf86827c543f3d76",
  "runsCaptured": 5,
  "decodeStatsAllRuns": {
    "min": 65.21729881717984,
    "max": 66.1040079428097,
    "median": 65.73375574461029,
    "p5": 65.31462935267862,
    "p95": 66.1035634732758,
    "mean": 65.77215991888275,
    "n": 5
  },
  "decodeStatsSteadyRuns": {
    "min": 65.21729881717984,
    "max": 66.1017855951402,
    "median": 65.71885361964203,
    "p5": 65.29029671880393,
    "p95": 66.04658111756072,
    "mean": 65.68919791290102,
    "n": 4
  },
  "runs": [
    {
      "runIndex": 1,
      "startedAt": "2026-06-02T01:03:34.628Z",
      "finishedAt": "2026-06-02T01:03:44.195Z",
      "rawPath": "public/benchmarks/evidence/local-ollama-rtx-3080-16gb-mobile-2026-06-02/mistral-nemo-12b/run1.ollama.json",
      "logPath": "public/benchmarks/evidence/local-ollama-rtx-3080-16gb-mobile-2026-06-02/mistral-nemo-12b/run1.log",
      "decodeTokPerSec": 66.1040079428097,
      "prefillTokPerSec": 329.1764259036526,
      "totalMs": 9475.0931,
      "loadMs": 4381.5359,
      "promptEvalMs": 39.4925,
      "evalMs": 3872.685,
      "promptTokens": 13,
      "responseTokens": 256,
      "responseHashSha256": "7d1c3bd8a49371bc157bcc48efbaec79744862e252beb254026643f0b777c664",
      "beforeGpu": {
        "vendor": "nvidia",
        "name": "NVIDIA GeForce RTX 3080 Laptop GPU",
        "driver": "571.96",
        "memoryTotalMb": 16384,
        "memoryUsedMb": 1830,
        "tempC": 53,
        "smClockMhz": 210,
        "powerDrawW": 11.6
      },
      "afterGpu": {
        "vendor": "nvidia",
        "name": "NVIDIA GeForce RTX 3080 Laptop GPU",
        "driver": "571.96",
        "memoryTotalMb": 16384,
        "memoryUsedMb": 9411,
        "tempC": 60,
        "smClockMhz": 1335,
        "powerDrawW": 114.82
      }
    },
    {
      "runIndex": 2,
      "startedAt": "2026-06-02T01:03:44.270Z",
      "finishedAt": "2026-06-02T01:03:49.782Z",
      "rawPath": "public/benchmarks/evidence/local-ollama-rtx-3080-16gb-mobile-2026-06-02/mistral-nemo-12b/run2.ollama.json",
      "logPath": "public/benchmarks/evidence/local-ollama-rtx-3080-16gb-mobile-2026-06-02/mistral-nemo-12b/run2.log",
      "decodeTokPerSec": 65.21729881717984,
      "prefillTokPerSec": 616.0553502037722,
      "totalMs": 5415.9074,
      "loadMs": 342.7836,
      "promptEvalMs": 21.102,
      "evalMs": 3925.3389,
      "promptTokens": 13,
      "responseTokens": 256,
      "responseHashSha256": "9192500c364148e2f1535b58a0121462f4ae94adead41a6cbb079dd8532299db",
      "beforeGpu": {
        "vendor": "nvidia",
        "name": "NVIDIA GeForce RTX 3080 Laptop GPU",
        "driver": "571.96",
        "memoryTotalMb": 16384,
        "memoryUsedMb": 9411,
        "tempC": 60,
        "smClockMhz": 1335,
        "powerDrawW": 108.43
      },
      "afterGpu": {
        "vendor": "nvidia",
        "name": "NVIDIA GeForce RTX 3080 Laptop GPU",
        "driver": "571.96",
        "memoryTotalMb": 16384,
        "memoryUsedMb": 9413,
        "tempC": 62,
        "smClockMhz": 1320,
        "powerDrawW": 114.49
      }
    },
    {
      "runIndex": 3,
      "startedAt": "2026-06-02T01:03:49.848Z",
      "finishedAt": "2026-06-02T01:03:55.428Z",
      "rawPath": "public/benchmarks/evidence/local-ollama-rtx-3080-16gb-mobile-2026-06-02/mistral-nemo-12b/run3.ollama.json",
      "logPath": "public/benchmarks/evidence/local-ollama-rtx-3080-16gb-mobile-2026-06-02/mistral-nemo-12b/run3.log",
      "decodeTokPerSec": 65.73375574461029,
      "prefillTokPerSec": 666.9676571991463,
      "totalMs": 5499.38,
      "loadMs": 429.1527,
      "promptEvalMs": 19.4912,
      "evalMs": 3894.4983,
      "promptTokens": 13,
      "responseTokens": 256,
      "responseHashSha256": "9192500c364148e2f1535b58a0121462f4ae94adead41a6cbb079dd8532299db",
      "beforeGpu": {
        "vendor": "nvidia",
        "name": "NVIDIA GeForce RTX 3080 Laptop GPU",
        "driver": "571.96",
        "memoryTotalMb": 16384,
        "memoryUsedMb": 9413,
        "tempC": 62,
        "smClockMhz": 1320,
        "powerDrawW": 107.5
      },
      "afterGpu": {
        "vendor": "nvidia",
        "name": "NVIDIA GeForce RTX 3080 Laptop GPU",
        "driver": "571.96",
        "memoryTotalMb": 16384,
        "memoryUsedMb": 9413,
        "tempC": 63,
        "smClockMhz": 1320,
        "powerDrawW": 111.94
      }
    },
    {
      "runIndex": 4,
      "startedAt": "2026-06-02T01:03:55.504Z",
      "finishedAt": "2026-06-02T01:04:01.010Z",
      "rawPath": "public/benchmarks/evidence/local-ollama-rtx-3080-16gb-mobile-2026-06-02/mistral-nemo-12b/run4.ollama.json",
      "logPath": "public/benchmarks/evidence/local-ollama-rtx-3080-16gb-mobile-2026-06-02/mistral-nemo-12b/run4.log",
      "decodeTokPerSec": 65.70395149467379,
      "prefillTokPerSec": 823.0296353978714,
      "totalMs": 5420.2822,
      "loadMs": 354.4345,
      "promptEvalMs": 15.7953,
      "evalMs": 3896.2649,
      "promptTokens": 13,
      "responseTokens": 256,
      "responseHashSha256": "9192500c364148e2f1535b58a0121462f4ae94adead41a6cbb079dd8532299db",
      "beforeGpu": {
        "vendor": "nvidia",
        "name": "NVIDIA GeForce RTX 3080 Laptop GPU",
        "driver": "571.96",
        "memoryTotalMb": 16384,
        "memoryUsedMb": 9413,
        "tempC": 62,
        "smClockMhz": 1320,
        "powerDrawW": 105.18
      },
      "afterGpu": {
        "vendor": "nvidia",
        "name": "NVIDIA GeForce RTX 3080 Laptop GPU",
        "driver": "571.96",
        "memoryTotalMb": 16384,
        "memoryUsedMb": 9413,
        "tempC": 65,
        "smClockMhz": 1305,
        "powerDrawW": 110.63
      }
    },
    {
      "runIndex": 5,
      "startedAt": "2026-06-02T01:04:01.082Z",
      "finishedAt": "2026-06-02T01:04:06.570Z",
      "rawPath": "public/benchmarks/evidence/local-ollama-rtx-3080-16gb-mobile-2026-06-02/mistral-nemo-12b/run5.ollama.json",
      "logPath": "public/benchmarks/evidence/local-ollama-rtx-3080-16gb-mobile-2026-06-02/mistral-nemo-12b/run5.log",
      "decodeTokPerSec": 66.1017855951402,
      "prefillTokPerSec": 812.7336609275167,
      "totalMs": 5402.8872,
      "loadMs": 334.3375,
      "promptEvalMs": 15.9954,
      "evalMs": 3872.8152,
      "promptTokens": 13,
      "responseTokens": 256,
      "responseHashSha256": "9192500c364148e2f1535b58a0121462f4ae94adead41a6cbb079dd8532299db",
      "beforeGpu": {
        "vendor": "nvidia",
        "name": "NVIDIA GeForce RTX 3080 Laptop GPU",
        "driver": "571.96",
        "memoryTotalMb": 16384,
        "memoryUsedMb": 9413,
        "tempC": 65,
        "smClockMhz": 1305,
        "powerDrawW": 110.63
      },
      "afterGpu": {
        "vendor": "nvidia",
        "name": "NVIDIA GeForce RTX 3080 Laptop GPU",
        "driver": "571.96",
        "memoryTotalMb": 16384,
        "memoryUsedMb": 9413,
        "tempC": 66,
        "smClockMhz": 1335,
        "powerDrawW": 114.68
      }
    }
  ],
  "errors": [],
  "uploadEligible": true
}