Roblox LLM Leaderboard
Tracking LLM capabilities regarding Roblox game development.
Benchmarks:
- RobloxQA: Multiple choice question answering about Roblox APIs and concepts.
 - RobloxQA_OpenEnded: Question answering about Roblox APIs and concepts without multiple choices. Response correctness judged by an ensemble of reasoning LLMs by comparing the generated answer to the correct answer.
 
 { 
  - "headers": [ 
- "Model",
 - "Precision",
 - "Params (B)",
 - "Reasoning",
 - "Open Source",
 - "Average",
 - "RobloxQA",
 - "RobloxQA_OpenEnded"
 
 - "data": [ 
-  [ 
- "OpenAI/gpt-o4-mini (high)",
 - null,
 - null,
 - true,
 - false,
 - 63.705,
 - 81.854,
 - 45.557
 
 -  [ 
- "OpenAI/gpt-o4-mini (low)",
 - null,
 - null,
 - true,
 - false,
 - 62.925,
 - 81.377,
 - 44.473
 
 -  [ 
- "Google/Gemini-2.5-Pro-Preview-03-25",
 - null,
 - null,
 - true,
 - false,
 - 61.993,
 - 82.547,
 - 41.439
 
 -  [ 
- "deepseek-ai/DeepSeek-R1",
 - "FP8",
 - 684.531,
 - true,
 - true,
 - 61.841,
 - 81.68,
 - 42.003
 
 -  [ 
- "OpenAI/gpt-4.1",
 - null,
 - null,
 - false,
 - false,
 - 60.888,
 - 80.987,
 - 40.789
 
 -  [ 
- "OpenAI/gpt-o3-mini (high)",
 - null,
 - null,
 - true,
 - false,
 - 60.845,
 - 81.291,
 - 40.399
 
 -  [ 
- "Anthropic/Claude-3.7-Sonnet (Thinking)",
 - null,
 - null,
 - true,
 - false,
 - 60.303,
 - 80.858,
 - 39.749
 
 -  [ 
- "Anthropic/Claude-3.7-Sonnet",
 - null,
 - null,
 - false,
 - false,
 - 58.938,
 - 80.121,
 - 37.755
 
 -  [ 
- "OpenAI/gpt-o3-mini (low)",
 - null,
 - null,
 - true,
 - false,
 - 58.093,
 - 79.688,
 - 36.498
 
 -  [ 
- "Google/Gemini-2.5-Flash-Preview-04-17",
 - null,
 - null,
 - true,
 - false,
 - 57.876,
 - 80.641,
 - 35.111
 
 -  [ 
- "Qwen/Qwen3-235B-A22B",
 - "FP8",
 - 235.094,
 - true,
 - true,
 - 56.793,
 - 78.302,
 - 35.284
 
 -  [ 
- "OpenAI/gpt-4.1-mini",
 - null,
 - null,
 - false,
 - false,
 - 56.576,
 - 79.212,
 - 33.94
 
 -  [ 
- "OpenAI/gpt-4o",
 - null,
 - null,
 - false,
 - false,
 - 56.338,
 - 77.523,
 - 35.154
 
 -  [ 
- "Anthropic/Claude-3.5-Sonnet",
 - null,
 - null,
 - false,
 - false,
 - 56.274,
 - 76.96,
 - 35.587
 
 -  [ 
- "deepseek-ai/DeepSeek-V3",
 - "FP8",
 - 684.531,
 - false,
 - true,
 - 55.601,
 - 77.566,
 - 33.637
 
 -  [ 
- "Qwen/QwQ-32B",
 - "FP16",
 - 32.764,
 - true,
 - true,
 - 54.323,
 - 77.869,
 - 30.776
 
 -  [ 
- "Google/Gemini-2.0-Flash",
 - null,
 - null,
 - false,
 - false,
 - 53.976,
 - 76.7,
 - 31.253
 
 -  [ 
- "boatbomber/Gemma-3-27B-Roblox-Luau",
 - "Q4_K_M",
 - 27.4,
 - false,
 - true,
 - 52.33,
 - 74.838,
 - 29.822
 
 -  [ 
- "OpenAI/gpt-4.1-nano",
 - null,
 - null,
 - false,
 - false,
 - 51.723,
 - 75.227,
 - 28.218
 
 -  [ 
- "deepseek-ai/DeepSeek-R1-Distill-Llama-70B",
 - "FP16",
 - 70.554,
 - true,
 - true,
 - 50.747,
 - 75.574,
 - 25.921
 
 -  [ 
- "Google/Gemini-2.0-Flash-Lite",
 - null,
 - null,
 - false,
 - false,
 - 50.704,
 - 74.924,
 - 26.485
 
 -  [ 
- "OpenAI/gpt-4o-mini",
 - null,
 - null,
 - false,
 - false,
 - 50.228,
 - 74.015,
 - 26.441
 
 -  [ 
- "Qwen/Qwen2.5-72B-Instruct",
 - "FP8",
 - 72.706,
 - false,
 - true,
 - 49.707,
 - 75.314,
 - 24.101
 
 -  [ 
- "meta-llama/Llama-3.3-70B-Instruct",
 - "FP8",
 - 70.554,
 - false,
 - true,
 - 49.621,
 - 75.011,
 - 24.231
 
 -  [ 
- "microsoft/phi-4",
 - "Q6_K",
 - 14.66,
 - false,
 - true,
 - 49.317,
 - 74.924,
 - 23.71
 
 -  [ 
- "google/gemma-3-27b-it",
 - "Q4_K_M",
 - 27.432,
 - false,
 - true,
 - 49.123,
 - 72.239,
 - 26.008
 
 -  [ 
- "mistralai/Mistral-Small-3.1-24B-Instruct-2503",
 - "Q6_K",
 - 24.011,
 - false,
 - true,
 - 48.646,
 - 73.668,
 - 23.624
 
 -  [ 
- "Qwen/Qwen2.5-Coder-32B-Instruct",
 - "FP16",
 - 32.764,
 - false,
 - true,
 - 48.473,
 - 73.105,
 - 23.84
 
 -  [ 
- "mistralai/Mistral-Small-24B-Instruct-2501",
 - "FP16",
 - 23.572,
 - false,
 - true,
 - 48.126,
 - 72.889,
 - 23.364
 
 -  [ 
- "boatbomber/R1-Distill-Qwen-14B-Roblox-Luau",
 - "Q6_K",
 - 14.77,
 - true,
 - true,
 - 46.847,
 - 74.968,
 - 18.726
 
 -  [ 
- "google/gemma-3-12b-it",
 - "Q4_K_M",
 - 12.187,
 - false,
 - true,
 - 46.112,
 - 70.03,
 - 22.193
 
 -  [ 
- "deepseek-ai/DeepSeek-R1-Distill-Qwen-14B",
 - "FP8",
 - 14.77,
 - true,
 - true,
 - 45.894,
 - 72.456,
 - 19.332
 
 -  [ 
- "Qwen/Qwen2.5-7B-Instruct",
 - "FP8",
 - 7.616,
 - false,
 - true,
 - 42.602,
 - 67.692,
 - 17.512
 
 -  [ 
- "internlm/internlm2_5-20b-chat",
 - "Q4_K_M",
 - 19.861,
 - false,
 - true,
 - 40.869,
 - 64.184,
 - 17.555
 
 -  [ 
- "mistralai/Mistral-Nemo-Instruct-2407",
 - "Q4_K_M",
 - 12.248,
 - false,
 - true,
 - 40.674,
 - 64.227,
 - 17.122
 
 -  [ 
- "ibm-granite/granite-3.2-8b-instruct",
 - "Q4_K_M",
 - 8.171,
 - false,
 - true,
 - 40.089,
 - 64.747,
 - 15.431
 
 -  [ 
- "meta-llama/Llama-3.1-8B-Instruct",
 - "FP8",
 - 8.03,
 - false,
 - true,
 - 38.073,
 - 65.786,
 - 10.36
 
 -  [ 
- "mistralai/Mistral-7B-Instruct-v0.3",
 - "FP16",
 - 7.248,
 - false,
 - true,
 - 37.684,
 - 63.014,
 - 12.354
 
 -  [ 
- "meta-llama/Llama-3.2-3B-Instruct",
 - "FP8",
 - 3.213,
 - false,
 - true,
 - 32.766,
 - 59.42,
 - 6.112
 
 -  [ 
- "NousResearch/Hermes-3-Llama-3.2-3B-GGUF",
 - "Q6_K",
 - 3.213,
 - false,
 - true,
 - 31.121,
 - 53.703,
 - 8.539
 
 -  [ 
- "deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B",
 - "FP16",
 - 1.777,
 - true,
 - true,
 - 23.843,
 - 44.435,
 - 3.251
 
 -  [ 
- "boatbomber/Gemma-3-1B-Roblox-Luau",
 - "Q5_K_M",
 - 1.302,
 - false,
 - true,
 - 21.916,
 - 39.974,
 - 3.858
 
 -  [ 
- "google/gemma-3-1b-it",
 - "Q4_K_M",
 - 1,
 - false,
 - true,
 - 21.829,
 - 39.541,
 - 4.118
 
 -  [ 
- "boatbomber/R1-Distill-Qwen-1.5B-Roblox-Luau",
 - "Q8_K",
 - 1.5,
 - true,
 - true,
 - 19.793,
 - 38.025,
 - 1.56
 
 -  [ 
- "google/gemma-3-1b-it-qat-q4_0-gguf",
 - "Q4_0",
 - 1,
 - false,
 - true,
 - 19.296,
 - 33.954,
 - 4.638
 
 
 -  [ 
 - "metadata": null