Persist Bench Leaderboard

{
  • "headers": [
    • "Rank",
    • "Type",
    • "Model",
    • "Cross-Domain FR",
    • "Sycophancy FR",
    • "Beneficial FR",
    • "Normalized Failure Score"
    ],
  • "data": [
    • [
      • 1,
      • "Proprietary",
      • "Claude-Opus-4.5",
      • 18,
      • 87.5,
      • 2,
      • 0.0259
      ],
    • [
      • 2,
      • "Proprietary",
      • "GPT-5.2 (High)",
      • 4,
      • 59,
      • 23,
      • 0.0333
      ],
    • [
      • 3,
      • "Proprietary",
      • "Claude-Sonnet-4.5",
      • 40,
      • 83.5,
      • 11,
      • 0.0412
      ],
    • [
      • 4,
      • "Proprietary",
      • "Gemini-3-Pro",
      • 59.5,
      • 100,
      • 4,
      • 0.0457
      ],
    • [
      • 5,
      • "Open Weights",
      • "DeepSeek-V3.2-Speciale",
      • 38,
      • 98.5,
      • 14,
      • 0.0461
      ],
    • [
      • 6,
      • "Proprietary",
      • "Grok-4.1-Fast",
      • 56.5,
      • 99.5,
      • 6,
      • 0.0462
      ],
    • [
      • 7,
      • "Open Weights",
      • "Kimi-K2-Thinking",
      • 49.5,
      • 97,
      • 10,
      • 0.0466
      ],
    • [
      • 8,
      • "Proprietary",
      • "Gemini-3-Flash",
      • 79.5,
      • 99,
      • 4,
      • 0.053
      ],
    • [
      • 9,
      • "Open Weights",
      • "GLM-4.7",
      • 47,
      • 99,
      • 21,
      • 0.0557
      ],
    • [
      • 10,
      • "Proprietary",
      • "Grok-4",
      • 86,
      • 100,
      • 5,
      • 0.0565
      ],
    • [
      • 11,
      • "Open Weights",
      • "MiniMax-M2.1",
      • 33.5,
      • 94,
      • 30,
      • 0.0575
      ],
    • [
      • 12,
      • "Open Weights",
      • "GPT-OSS-120B",
      • 59.5,
      • 96.5,
      • 20,
      • 0.059
      ],
    • [
      • 13,
      • "Open Weights",
      • "Qwen3-235B-A22B-Think",
      • 91,
      • 100,
      • 12,
      • 0.0645
      ],
    • [
      • 14,
      • "Open Weights",
      • "Qwen3-235B-A22B",
      • 76,
      • 99.5,
      • 19,
      • 0.0649
      ],
    • [
      • 15,
      • "Proprietary",
      • "GPT-4o",
      • 13,
      • 88,
      • 53,
      • 0.0687
      ],
    • [
      • 16,
      • "Open Weights",
      • "Llama-3.3-70B-Instruct",
      • 17.5,
      • 82,
      • 55,
      • 0.071
      ],
    • [
      • 17,
      • "Open Weights",
      • "Kimi-K2-0905",
      • 61.5,
      • 99.5,
      • 33,
      • 0.0717
      ],
    • [
      • 18,
      • "Open Weights",
      • "Llama-4-Maverick",
      • 59,
      • 96,
      • 59,
      • 0.0928
      ]
    ],
  • "metadata": null
}