{"model":{"slug":"openai/gpt-5.4","display_name":"OpenAI: GPT-5.4","api_model_id":"gpt-5.4","family_slug":"gpt","added_at":"2026-03-05","settings_suffix":""},"aggregates":{"avg_score":60.863636,"tasks_attempted":275,"tasks_passed":133,"tasks_attempted_distinct":64,"tasks_passed_attempt_1":40,"tasks_passed_attempt_2_only":6,"pass_at_n":0.71875,"avg_cost_usd":0.000052,"latency_p50_ms":142664,"latency_p95_ms":160990,"pass_rate_ci":{"lower":0.598658376004965,"upper":0.8140677389451542},"pass_hat_at_n":0.65625,"cost_per_pass_usd":0.000073,"run_count":3,"verified_runs":0},"settings":{"temperature":null,"thinking_budget":null,"tokens_avg_per_run":122617,"consistency_pct":89.06},"history":[{"run_id":"886e8916-8c05-4a9e-8e2e-7c1cf215412d","ts":"2026-05-05T13:59:43.150Z","score":59.139785,"cost_usd":0.001154,"tier":"claimed"},{"run_id":"1b251048-5bd2-40a9-8dcd-da1d2e50e256","ts":"2026-05-05T11:14:16.474Z","score":60.054348,"cost_usd":0.001118,"tier":"claimed"},{"run_id":"7b2a8a3e-13f7-4fb7-a1ac-672b844b277a","ts":"2026-05-05T08:29:04.134Z","score":63.472222,"cost_usd":0.00108,"tier":"claimed"}],"failure_modes":[{"code":"AL0104","count":132,"pct":0.279661,"example_message":"Syntax error, '{' expected"},{"code":"AL0000","count":87,"pct":0.184322,"example_message":"App generation failed"},{"code":"AL0111","count":42,"pct":0.088983,"example_message":"Semicolon expected. Add a semicolon (;) to terminate the statement."},{"code":"AL0360","count":36,"pct":0.076271,"example_message":"Text literal was not properly terminated. Use the character ' to terminate the literal."},{"code":"AL0107","count":35,"pct":0.074153,"example_message":"Syntax error, identifier expected. Provide a valid name (letters, digits, and underscores only)."},{"code":"AL0118","count":26,"pct":0.055085,"example_message":"The name 'CreateSequentialGuid' does not exist in the current context."},{"code":"AL0198","count":26,"pct":0.055085,"example_message":"Expected one of the application object keywords (table, tableextension, page, pageextension, pagecustomization, profile, profileextension, codeunit, report, reportextension, xmlport, query, controladdin, dotnet, enum, enumextension, interface, permissionset, permissionsetextension, entitlement)"},{"code":"AL0185","count":17,"pct":0.036017,"example_message":"Interface 'CG Token Provider' is missing"},{"code":"AL0126","count":15,"pct":0.03178,"example_message":"No overload for method 'DefaultRequestHeaders' takes 1 arguments. Candidates: built-in method 'DefaultRequestHeaders()'"},{"code":"AL0132","count":13,"pct":0.027542,"example_message":"'Status' does not contain a definition for ' '"}],"recent_runs":[{"run_id":"886e8916-8c05-4a9e-8e2e-7c1cf215412d","ts":"2026-05-05T13:59:43.150Z","score":59.139785,"cost_usd":0.001154,"tier":"claimed"},{"run_id":"1b251048-5bd2-40a9-8dcd-da1d2e50e256","ts":"2026-05-05T11:14:16.474Z","score":60.054348,"cost_usd":0.001118,"tier":"claimed"},{"run_id":"7b2a8a3e-13f7-4fb7-a1ac-672b844b277a","ts":"2026-05-05T08:29:04.134Z","score":63.472222,"cost_usd":0.00108,"tier":"claimed"}]}