Roblox LLM Leaderboard
Tracking LLM capabilities regarding Roblox game development.
Benchmarks:
- RobloxQA: Multiple choice question answering about Roblox APIs and concepts.
- RobloxQA_OpenEnded: Question answering about Roblox APIs and concepts without multiple choices. Response correctness judged by an ensemble of reasoning LLMs by comparing the generated answer to the correct answer.
{
- "headers": [
- "Model",
- "Precision",
- "Params (B)",
- "Reasoning",
- "Open Source",
- "Average",
- "RobloxQA",
- "RobloxQA_OpenEnded"
- "data": [
- [
- "OpenAI/gpt-o4-mini (high)",
- null,
- null,
- true,
- false,
- 63.705,
- 81.854,
- 45.557
- [
- "OpenAI/gpt-o4-mini (low)",
- null,
- null,
- true,
- false,
- 62.925,
- 81.377,
- 44.473
- [
- "Google/Gemini-2.5-Pro-Preview-03-25",
- null,
- null,
- true,
- false,
- 61.993,
- 82.547,
- 41.439
- [
- "deepseek-ai/DeepSeek-R1",
- "FP8",
- 684.531,
- true,
- true,
- 61.841,
- 81.68,
- 42.003
- [
- "OpenAI/gpt-4.1",
- null,
- null,
- false,
- false,
- 60.888,
- 80.987,
- 40.789
- [
- "OpenAI/gpt-o3-mini (high)",
- null,
- null,
- true,
- false,
- 60.845,
- 81.291,
- 40.399
- [
- "Anthropic/Claude-3.7-Sonnet (Thinking)",
- null,
- null,
- true,
- false,
- 60.303,
- 80.858,
- 39.749
- [
- "Anthropic/Claude-3.7-Sonnet",
- null,
- null,
- false,
- false,
- 58.938,
- 80.121,
- 37.755
- [
- "OpenAI/gpt-o3-mini (low)",
- null,
- null,
- true,
- false,
- 58.093,
- 79.688,
- 36.498
- [
- "Google/Gemini-2.5-Flash-Preview-04-17",
- null,
- null,
- true,
- false,
- 57.876,
- 80.641,
- 35.111
- [
- "Qwen/Qwen3-235B-A22B",
- "FP8",
- 235.094,
- true,
- true,
- 56.793,
- 78.302,
- 35.284
- [
- "OpenAI/gpt-4.1-mini",
- null,
- null,
- false,
- false,
- 56.576,
- 79.212,
- 33.94
- [
- "OpenAI/gpt-4o",
- null,
- null,
- false,
- false,
- 56.338,
- 77.523,
- 35.154
- [
- "Anthropic/Claude-3.5-Sonnet",
- null,
- null,
- false,
- false,
- 56.274,
- 76.96,
- 35.587
- [
- "deepseek-ai/DeepSeek-V3",
- "FP8",
- 684.531,
- false,
- true,
- 55.601,
- 77.566,
- 33.637
- [
- "Qwen/QwQ-32B",
- "FP16",
- 32.764,
- true,
- true,
- 54.323,
- 77.869,
- 30.776
- [
- "Google/Gemini-2.0-Flash",
- null,
- null,
- false,
- false,
- 53.976,
- 76.7,
- 31.253
- [
- "boatbomber/Gemma-3-27B-Roblox-Luau",
- "Q4_K_M",
- 27.4,
- false,
- true,
- 52.33,
- 74.838,
- 29.822
- [
- "OpenAI/gpt-4.1-nano",
- null,
- null,
- false,
- false,
- 51.723,
- 75.227,
- 28.218
- [
- "deepseek-ai/DeepSeek-R1-Distill-Llama-70B",
- "FP16",
- 70.554,
- true,
- true,
- 50.747,
- 75.574,
- 25.921
- [
- "Google/Gemini-2.0-Flash-Lite",
- null,
- null,
- false,
- false,
- 50.704,
- 74.924,
- 26.485
- [
- "OpenAI/gpt-4o-mini",
- null,
- null,
- false,
- false,
- 50.228,
- 74.015,
- 26.441
- [
- "Qwen/Qwen2.5-72B-Instruct",
- "FP8",
- 72.706,
- false,
- true,
- 49.707,
- 75.314,
- 24.101
- [
- "meta-llama/Llama-3.3-70B-Instruct",
- "FP8",
- 70.554,
- false,
- true,
- 49.621,
- 75.011,
- 24.231
- [
- "microsoft/phi-4",
- "Q6_K",
- 14.66,
- false,
- true,
- 49.317,
- 74.924,
- 23.71
- [
- "google/gemma-3-27b-it",
- "Q4_K_M",
- 27.432,
- false,
- true,
- 49.123,
- 72.239,
- 26.008
- [
- "mistralai/Mistral-Small-3.1-24B-Instruct-2503",
- "Q6_K",
- 24.011,
- false,
- true,
- 48.646,
- 73.668,
- 23.624
- [
- "Qwen/Qwen2.5-Coder-32B-Instruct",
- "FP16",
- 32.764,
- false,
- true,
- 48.473,
- 73.105,
- 23.84
- [
- "mistralai/Mistral-Small-24B-Instruct-2501",
- "FP16",
- 23.572,
- false,
- true,
- 48.126,
- 72.889,
- 23.364
- [
- "boatbomber/R1-Distill-Qwen-14B-Roblox-Luau",
- "Q6_K",
- 14.77,
- true,
- true,
- 46.847,
- 74.968,
- 18.726
- [
- "google/gemma-3-12b-it",
- "Q4_K_M",
- 12.187,
- false,
- true,
- 46.112,
- 70.03,
- 22.193
- [
- "deepseek-ai/DeepSeek-R1-Distill-Qwen-14B",
- "FP8",
- 14.77,
- true,
- true,
- 45.894,
- 72.456,
- 19.332
- [
- "Qwen/Qwen2.5-7B-Instruct",
- "FP8",
- 7.616,
- false,
- true,
- 42.602,
- 67.692,
- 17.512
- [
- "internlm/internlm2_5-20b-chat",
- "Q4_K_M",
- 19.861,
- false,
- true,
- 40.869,
- 64.184,
- 17.555
- [
- "mistralai/Mistral-Nemo-Instruct-2407",
- "Q4_K_M",
- 12.248,
- false,
- true,
- 40.674,
- 64.227,
- 17.122
- [
- "ibm-granite/granite-3.2-8b-instruct",
- "Q4_K_M",
- 8.171,
- false,
- true,
- 40.089,
- 64.747,
- 15.431
- [
- "meta-llama/Llama-3.1-8B-Instruct",
- "FP8",
- 8.03,
- false,
- true,
- 38.073,
- 65.786,
- 10.36
- [
- "mistralai/Mistral-7B-Instruct-v0.3",
- "FP16",
- 7.248,
- false,
- true,
- 37.684,
- 63.014,
- 12.354
- [
- "meta-llama/Llama-3.2-3B-Instruct",
- "FP8",
- 3.213,
- false,
- true,
- 32.766,
- 59.42,
- 6.112
- [
- "NousResearch/Hermes-3-Llama-3.2-3B-GGUF",
- "Q6_K",
- 3.213,
- false,
- true,
- 31.121,
- 53.703,
- 8.539
- [
- "deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B",
- "FP16",
- 1.777,
- true,
- true,
- 23.843,
- 44.435,
- 3.251
- [
- "boatbomber/Gemma-3-1B-Roblox-Luau",
- "Q5_K_M",
- 1.302,
- false,
- true,
- 21.916,
- 39.974,
- 3.858
- [
- "google/gemma-3-1b-it",
- "Q4_K_M",
- 1,
- false,
- true,
- 21.829,
- 39.541,
- 4.118
- [
- "boatbomber/R1-Distill-Qwen-1.5B-Roblox-Luau",
- "Q8_K",
- 1.5,
- true,
- true,
- 19.793,
- 38.025,
- 1.56
- [
- "google/gemma-3-1b-it-qat-q4_0-gguf",
- "Q4_0",
- 1,
- false,
- true,
- 19.296,
- 33.954,
- 4.638
- [
- "metadata": null