{
  "evidenceVersion": "local-ollama-speed-v1",
  "capturedAt": "2026-06-01T22:33:39.141Z",
  "hardwareSlug": "rtx-3080-16gb-mobile",
  "modelTag": "hermes3:8b",
  "catalogSlug": "hermes-3-llama-3.1-8b",
  "quant": "Q4_K_M",
  "contextSize": 4096,
  "numPredict": 256,
  "prompt": "Write a detailed explanation of how transformer attention works.",
  "scenario": "single-stream",
  "runtime": "ollama-api",
  "command": "POST http://localhost:11434/api/generate model=hermes3:8b temperature=0 top_p=1 seed=42 num_ctx=4096 num_predict=256",
  "env": {
    "os": "Microsoft Windows [Version 10.0.26200.8457]",
    "platform": "win32",
    "arch": "x64",
    "cpu": "AMD Ryzen 9 5900HX with Radeon Graphics        ",
    "cpuThreads": 16,
    "ramGb": 31.9,
    "gpu": {
      "vendor": "nvidia",
      "name": "NVIDIA GeForce RTX 3080 Laptop GPU",
      "driver": "571.96",
      "memoryTotalMb": 16384,
      "memoryUsedMb": 1872,
      "tempC": 55,
      "smClockMhz": 210,
      "powerDrawW": 17.06
    },
    "ollamaVersionCli": "ollama version is 0.24.0",
    "ollamaApiVersion": {
      "version": "0.24.0"
    }
  },
  "ollamaTagDetails": {
    "name": "hermes3:8b",
    "model": "hermes3:8b",
    "modified_at": "2026-06-02T01:33:11.105474+03:00",
    "size": 4661227243,
    "digest": "4f6b83f30b62bc3d0cf9be09266db222805ee815c8fd7d8b38f863f655be78b7",
    "details": {
      "parent_model": "",
      "format": "gguf",
      "family": "llama",
      "families": [
        "llama"
      ],
      "parameter_size": "8.0B",
      "quantization_level": "Q4_0"
    }
  },
  "notes": null,
  "logHashSha256": "0d7897210037f2b5b2fbcb7b707ebdacfb2e075820afbf756455e15499073cef",
  "runsCaptured": 5,
  "decodeStatsAllRuns": {
    "min": 81.24887132305219,
    "max": 82.22500873640718,
    "median": 81.71858265247283,
    "p5": 81.25417685024719,
    "p95": 82.14587517829548,
    "mean": 81.65944052336162,
    "n": 5
  },
  "decodeStatsSteadyRuns": {
    "min": 81.24887132305219,
    "max": 81.82934094584873,
    "median": 81.49699080575003,
    "p5": 81.25285046844844,
    "p95": 81.81272720184235,
    "mean": 81.51804847010024,
    "n": 4
  },
  "runs": [
    {
      "runIndex": 1,
      "startedAt": "2026-06-01T22:33:15.387Z",
      "finishedAt": "2026-06-01T22:33:22.781Z",
      "rawPath": "public/benchmarks/evidence/local-ollama-rtx-3080-16gb-mobile-2026-06-01/hermes3-8b/run1.ollama.json",
      "logPath": "public/benchmarks/evidence/local-ollama-rtx-3080-16gb-mobile-2026-06-01/hermes3-8b/run1.log",
      "decodeTokPerSec": 82.22500873640718,
      "prefillTokPerSec": 561.8741756714396,
      "totalMs": 7305.4599,
      "loadMs": 3709.6414,
      "promptEvalMs": 33.8154,
      "evalMs": 3113.408,
      "promptTokens": 19,
      "responseTokens": 256,
      "responseHashSha256": "f9bd077ffd878d0d21a0f0092a59bcd7c9e125975ad8a708f7e390017afd4c32",
      "beforeGpu": {
        "vendor": "nvidia",
        "name": "NVIDIA GeForce RTX 3080 Laptop GPU",
        "driver": "571.96",
        "memoryTotalMb": 16384,
        "memoryUsedMb": 1872,
        "tempC": 55,
        "smClockMhz": 210,
        "powerDrawW": 17.06
      },
      "afterGpu": {
        "vendor": "nvidia",
        "name": "NVIDIA GeForce RTX 3080 Laptop GPU",
        "driver": "571.96",
        "memoryTotalMb": 16384,
        "memoryUsedMb": 7057,
        "tempC": 62,
        "smClockMhz": 1410,
        "powerDrawW": 112.57
      }
    },
    {
      "runIndex": 2,
      "startedAt": "2026-06-01T22:33:22.857Z",
      "finishedAt": "2026-06-01T22:33:26.857Z",
      "rawPath": "public/benchmarks/evidence/local-ollama-rtx-3080-16gb-mobile-2026-06-01/hermes3-8b/run2.ollama.json",
      "logPath": "public/benchmarks/evidence/local-ollama-rtx-3080-16gb-mobile-2026-06-01/hermes3-8b/run2.log",
      "decodeTokPerSec": 81.24887132305219,
      "prefillTokPerSec": 1230.633711591274,
      "totalMs": 3909.4693,
      "loadMs": 348.2033,
      "promptEvalMs": 15.4392,
      "evalMs": 3150.813,
      "promptTokens": 19,
      "responseTokens": 256,
      "responseHashSha256": "9ed0c6361cfbc374c617a51342372e77bf16f589471f4004bb924f2701c2b2b0",
      "beforeGpu": {
        "vendor": "nvidia",
        "name": "NVIDIA GeForce RTX 3080 Laptop GPU",
        "driver": "571.96",
        "memoryTotalMb": 16384,
        "memoryUsedMb": 7057,
        "tempC": 62,
        "smClockMhz": 1770,
        "powerDrawW": 105.7
      },
      "afterGpu": {
        "vendor": "nvidia",
        "name": "NVIDIA GeForce RTX 3080 Laptop GPU",
        "driver": "571.96",
        "memoryTotalMb": 16384,
        "memoryUsedMb": 7057,
        "tempC": 63,
        "smClockMhz": 1725,
        "powerDrawW": 111.03
      }
    },
    {
      "runIndex": 3,
      "startedAt": "2026-06-01T22:33:26.934Z",
      "finishedAt": "2026-06-01T22:33:31.059Z",
      "rawPath": "public/benchmarks/evidence/local-ollama-rtx-3080-16gb-mobile-2026-06-01/hermes3-8b/run3.ollama.json",
      "logPath": "public/benchmarks/evidence/local-ollama-rtx-3080-16gb-mobile-2026-06-01/hermes3-8b/run3.log",
      "decodeTokPerSec": 81.27539895902723,
      "prefillTokPerSec": 1190.2897415818325,
      "totalMs": 4041.4635,
      "loadMs": 424.2516,
      "promptEvalMs": 15.9625,
      "evalMs": 3149.7846,
      "promptTokens": 19,
      "responseTokens": 256,
      "responseHashSha256": "9ed0c6361cfbc374c617a51342372e77bf16f589471f4004bb924f2701c2b2b0",
      "beforeGpu": {
        "vendor": "nvidia",
        "name": "NVIDIA GeForce RTX 3080 Laptop GPU",
        "driver": "571.96",
        "memoryTotalMb": 16384,
        "memoryUsedMb": 7057,
        "tempC": 63,
        "smClockMhz": 1725,
        "powerDrawW": 111.03
      },
      "afterGpu": {
        "vendor": "nvidia",
        "name": "NVIDIA GeForce RTX 3080 Laptop GPU",
        "driver": "571.96",
        "memoryTotalMb": 16384,
        "memoryUsedMb": 7057,
        "tempC": 64,
        "smClockMhz": 1440,
        "powerDrawW": 111.57
      }
    },
    {
      "runIndex": 4,
      "startedAt": "2026-06-01T22:33:31.138Z",
      "finishedAt": "2026-06-01T22:33:35.091Z",
      "rawPath": "public/benchmarks/evidence/local-ollama-rtx-3080-16gb-mobile-2026-06-01/hermes3-8b/run4.ollama.json",
      "logPath": "public/benchmarks/evidence/local-ollama-rtx-3080-16gb-mobile-2026-06-01/hermes3-8b/run4.log",
      "decodeTokPerSec": 81.71858265247283,
      "prefillTokPerSec": 1306.605233297803,
      "totalMs": 3864.8574,
      "loadMs": 324.2033,
      "promptEvalMs": 14.5415,
      "evalMs": 3132.7024,
      "promptTokens": 19,
      "responseTokens": 256,
      "responseHashSha256": "9ed0c6361cfbc374c617a51342372e77bf16f589471f4004bb924f2701c2b2b0",
      "beforeGpu": {
        "vendor": "nvidia",
        "name": "NVIDIA GeForce RTX 3080 Laptop GPU",
        "driver": "571.96",
        "memoryTotalMb": 16384,
        "memoryUsedMb": 7057,
        "tempC": 62,
        "smClockMhz": 1440,
        "powerDrawW": 104.8
      },
      "afterGpu": {
        "vendor": "nvidia",
        "name": "NVIDIA GeForce RTX 3080 Laptop GPU",
        "driver": "571.96",
        "memoryTotalMb": 16384,
        "memoryUsedMb": 7057,
        "tempC": 65,
        "smClockMhz": 1380,
        "powerDrawW": 113.64
      }
    },
    {
      "runIndex": 5,
      "startedAt": "2026-06-01T22:33:35.168Z",
      "finishedAt": "2026-06-01T22:33:39.139Z",
      "rawPath": "public/benchmarks/evidence/local-ollama-rtx-3080-16gb-mobile-2026-06-01/hermes3-8b/run5.ollama.json",
      "logPath": "public/benchmarks/evidence/local-ollama-rtx-3080-16gb-mobile-2026-06-01/hermes3-8b/run5.log",
      "decodeTokPerSec": 81.82934094584873,
      "prefillTokPerSec": 1284.9298022560663,
      "totalMs": 3886.7771,
      "loadMs": 336.5254,
      "promptEvalMs": 14.7868,
      "evalMs": 3128.4622,
      "promptTokens": 19,
      "responseTokens": 256,
      "responseHashSha256": "9ed0c6361cfbc374c617a51342372e77bf16f589471f4004bb924f2701c2b2b0",
      "beforeGpu": {
        "vendor": "nvidia",
        "name": "NVIDIA GeForce RTX 3080 Laptop GPU",
        "driver": "571.96",
        "memoryTotalMb": 16384,
        "memoryUsedMb": 7057,
        "tempC": 63,
        "smClockMhz": 1380,
        "powerDrawW": 106.88
      },
      "afterGpu": {
        "vendor": "nvidia",
        "name": "NVIDIA GeForce RTX 3080 Laptop GPU",
        "driver": "571.96",
        "memoryTotalMb": 16384,
        "memoryUsedMb": 7057,
        "tempC": 65,
        "smClockMhz": 1410,
        "powerDrawW": 110.29
      }
    }
  ],
  "errors": [],
  "uploadEligible": true
}