{
  "evidenceVersion": "local-ollama-speed-v1",
  "capturedAt": "2026-05-28T04:17:04.717Z",
  "hardwareSlug": "rtx-5080",
  "modelTag": "alibayram/Trendyol-LLM-Asure-12B:latest",
  "catalogSlug": "trendyol-llm-asure-12b",
  "quant": "Q4_K_M",
  "contextSize": 4096,
  "numPredict": 256,
  "prompt": "Write a detailed explanation of how transformer attention works.",
  "scenario": "single-stream",
  "runtime": "ollama-api",
  "command": "POST http://localhost:11434/api/generate model=alibayram/Trendyol-LLM-Asure-12B:latest temperature=0 top_p=1 seed=42 num_ctx=4096 num_predict=256",
  "env": {
    "os": "Microsoft Windows [Version 10.0.26200.8457]",
    "platform": "win32",
    "arch": "x64",
    "cpu": "AMD Ryzen 7 5800X3D 8-Core Processor           ",
    "cpuThreads": 16,
    "ramGb": 31.9,
    "gpu": {
      "name": "NVIDIA GeForce RTX 5080",
      "driver": "595.97",
      "memoryTotalMb": 16303,
      "memoryUsedMb": 9937,
      "tempC": 43,
      "smClockMhz": 1875,
      "powerDrawW": 39.21
    },
    "ollamaVersionCli": null,
    "ollamaApiVersion": {
      "version": "0.24.0"
    }
  },
  "ollamaTagDetails": {
    "name": "alibayram/Trendyol-LLM-Asure-12B:latest",
    "model": "alibayram/Trendyol-LLM-Asure-12B:latest",
    "modified_at": "2026-05-27T03:11:43.0011017-07:00",
    "size": 7300778095,
    "digest": "592bfbc20577bffb7e887edacfb343027d283cd5bfdc79cada68c4b0f284ed31",
    "details": {
      "parent_model": "",
      "format": "gguf",
      "family": "gemma3",
      "families": [
        "gemma3"
      ],
      "parameter_size": "11.8B",
      "quantization_level": "unknown"
    }
  },
  "notes": "Ollama manifest reports quantization as unknown; prior operator row and size indicate Q4_K_M.",
  "logHashSha256": "f1021f5e22bf473a2746b3a3b074d4a4aacb39622599275b4ac2b67a28c08d0c",
  "runsCaptured": 5,
  "decodeStatsAllRuns": {
    "min": 81.65266265027,
    "max": 82.39395905515886,
    "median": 81.94906372714522,
    "p5": 81.65383365373636,
    "p95": 82.33322380991153,
    "mean": 81.94889718581962,
    "n": 5
  },
  "decodeStatsSteadyRuns": {
    "min": 81.6585176676018,
    "max": 82.39395905515886,
    "median": 82.01967327803371,
    "p5": 81.70209957653331,
    "p95": 82.34840762122336,
    "mean": 82.02295581970702,
    "n": 4
  },
  "runs": [
    {
      "runIndex": 1,
      "startedAt": "2026-05-28T04:16:44.052Z",
      "finishedAt": "2026-05-28T04:16:50.792Z",
      "rawPath": "public/benchmarks/evidence/local-ollama-rtx-5080-2026-05-28/alibayram-trendyol-llm-asure-12b-latest/run1.ollama.json",
      "logPath": "public/benchmarks/evidence/local-ollama-rtx-5080-2026-05-28/alibayram-trendyol-llm-asure-12b-latest/run1.log",
      "decodeTokPerSec": 81.65266265027,
      "prefillTokPerSec": 481.7639660838168,
      "totalMs": 6669.3014,
      "loadMs": 3334.4182,
      "promptEvalMs": 39.4384,
      "evalMs": 3135.2315,
      "promptTokens": 19,
      "responseTokens": 256,
      "responseHashSha256": "4a5830d10fed7c80a5761c0e174695003cb1e17974ebaa8d1dc832650a5d7b7b",
      "beforeGpu": {
        "name": "NVIDIA GeForce RTX 5080",
        "driver": "595.97",
        "memoryTotalMb": 16303,
        "memoryUsedMb": 9785,
        "tempC": 43,
        "smClockMhz": 1875,
        "powerDrawW": 39.21
      },
      "afterGpu": {
        "name": "NVIDIA GeForce RTX 5080",
        "driver": "595.97",
        "memoryTotalMb": 16303,
        "memoryUsedMb": 10051,
        "tempC": 57,
        "smClockMhz": 2872,
        "powerDrawW": 266.76
      }
    },
    {
      "runIndex": 2,
      "startedAt": "2026-05-28T04:16:50.829Z",
      "finishedAt": "2026-05-28T04:16:54.268Z",
      "rawPath": "public/benchmarks/evidence/local-ollama-rtx-5080-2026-05-28/alibayram-trendyol-llm-asure-12b-latest/run2.ollama.json",
      "logPath": "public/benchmarks/evidence/local-ollama-rtx-5080-2026-05-28/alibayram-trendyol-llm-asure-12b-latest/run2.log",
      "decodeTokPerSec": 82.09028282892221,
      "prefillTokPerSec": 1052.9699293955953,
      "totalMs": 3369.7111,
      "loadMs": 118.5942,
      "promptEvalMs": 18.0442,
      "evalMs": 3118.5177,
      "promptTokens": 19,
      "responseTokens": 256,
      "responseHashSha256": "9d3e703adc7132a265402495ba699baeb78c11f67a79d2cdafcb32e516f4c4c7",
      "beforeGpu": {
        "name": "NVIDIA GeForce RTX 5080",
        "driver": "595.97",
        "memoryTotalMb": 16303,
        "memoryUsedMb": 10051,
        "tempC": 57,
        "smClockMhz": 2872,
        "powerDrawW": 266.76
      },
      "afterGpu": {
        "name": "NVIDIA GeForce RTX 5080",
        "driver": "595.97",
        "memoryTotalMb": 16303,
        "memoryUsedMb": 10047,
        "tempC": 58,
        "smClockMhz": 2872,
        "powerDrawW": 271.06
      }
    },
    {
      "runIndex": 3,
      "startedAt": "2026-05-28T04:16:54.305Z",
      "finishedAt": "2026-05-28T04:16:57.752Z",
      "rawPath": "public/benchmarks/evidence/local-ollama-rtx-5080-2026-05-28/alibayram-trendyol-llm-asure-12b-latest/run3.ollama.json",
      "logPath": "public/benchmarks/evidence/local-ollama-rtx-5080-2026-05-28/alibayram-trendyol-llm-asure-12b-latest/run3.log",
      "decodeTokPerSec": 81.6585176676018,
      "prefillTokPerSec": 1014.355803983749,
      "totalMs": 3381.2439,
      "loadMs": 117.2494,
      "promptEvalMs": 18.7311,
      "evalMs": 3135.0067,
      "promptTokens": 19,
      "responseTokens": 256,
      "responseHashSha256": "9d3e703adc7132a265402495ba699baeb78c11f67a79d2cdafcb32e516f4c4c7",
      "beforeGpu": {
        "name": "NVIDIA GeForce RTX 5080",
        "driver": "595.97",
        "memoryTotalMb": 16303,
        "memoryUsedMb": 10047,
        "tempC": 57,
        "smClockMhz": 2872,
        "powerDrawW": 267.72
      },
      "afterGpu": {
        "name": "NVIDIA GeForce RTX 5080",
        "driver": "595.97",
        "memoryTotalMb": 16303,
        "memoryUsedMb": 10047,
        "tempC": 59,
        "smClockMhz": 2872,
        "powerDrawW": 270.81
      }
    },
    {
      "runIndex": 4,
      "startedAt": "2026-05-28T04:16:57.791Z",
      "finishedAt": "2026-05-28T04:17:01.216Z",
      "rawPath": "public/benchmarks/evidence/local-ollama-rtx-5080-2026-05-28/alibayram-trendyol-llm-asure-12b-latest/run4.ollama.json",
      "logPath": "public/benchmarks/evidence/local-ollama-rtx-5080-2026-05-28/alibayram-trendyol-llm-asure-12b-latest/run4.log",
      "decodeTokPerSec": 82.39395905515886,
      "prefillTokPerSec": 1009.4033894703289,
      "totalMs": 3357.0079,
      "loadMs": 116.8016,
      "promptEvalMs": 18.823,
      "evalMs": 3107.0239,
      "promptTokens": 19,
      "responseTokens": 256,
      "responseHashSha256": "9d3e703adc7132a265402495ba699baeb78c11f67a79d2cdafcb32e516f4c4c7",
      "beforeGpu": {
        "name": "NVIDIA GeForce RTX 5080",
        "driver": "595.97",
        "memoryTotalMb": 16303,
        "memoryUsedMb": 10047,
        "tempC": 56,
        "smClockMhz": 2872,
        "powerDrawW": 266.87
      },
      "afterGpu": {
        "name": "NVIDIA GeForce RTX 5080",
        "driver": "595.97",
        "memoryTotalMb": 16303,
        "memoryUsedMb": 10051,
        "tempC": 60,
        "smClockMhz": 2872,
        "powerDrawW": 273.77
      }
    },
    {
      "runIndex": 5,
      "startedAt": "2026-05-28T04:17:01.254Z",
      "finishedAt": "2026-05-28T04:17:04.717Z",
      "rawPath": "public/benchmarks/evidence/local-ollama-rtx-5080-2026-05-28/alibayram-trendyol-llm-asure-12b-latest/run5.ollama.json",
      "logPath": "public/benchmarks/evidence/local-ollama-rtx-5080-2026-05-28/alibayram-trendyol-llm-asure-12b-latest/run5.log",
      "decodeTokPerSec": 81.94906372714522,
      "prefillTokPerSec": 988.5433033995484,
      "totalMs": 3388.4129,
      "loadMs": 117.744,
      "promptEvalMs": 19.2202,
      "evalMs": 3123.8917,
      "promptTokens": 19,
      "responseTokens": 256,
      "responseHashSha256": "9d3e703adc7132a265402495ba699baeb78c11f67a79d2cdafcb32e516f4c4c7",
      "beforeGpu": {
        "name": "NVIDIA GeForce RTX 5080",
        "driver": "595.97",
        "memoryTotalMb": 16303,
        "memoryUsedMb": 10051,
        "tempC": 60,
        "smClockMhz": 2872,
        "powerDrawW": 273.77
      },
      "afterGpu": {
        "name": "NVIDIA GeForce RTX 5080",
        "driver": "595.97",
        "memoryTotalMb": 16303,
        "memoryUsedMb": 10117,
        "tempC": 61,
        "smClockMhz": 2857,
        "powerDrawW": 271.93
      }
    }
  ],
  "errors": [],
  "uploadEligible": true
}