{"model":{"slug":"openai/gpt-5.5","display_name":"GPT-5.5","api_model_id":"gpt-5.5","family_slug":"gpt","added_at":"2026-04-23","settings_suffix":""},"aggregates":{"avg_score":57.037037,"tasks_attempted":540,"tasks_passed":273,"tasks_attempted_distinct":64,"tasks_passed_attempt_1":44,"tasks_passed_attempt_2_only":7,"pass_at_n":0.796875,"avg_cost_usd":0.710189,"latency_p50_ms":159009,"latency_p95_ms":226484,"pass_rate_ci":{"lower":0.6828613244878341,"upper":0.8772669743730419},"pass_hat_at_n":0.640625,"cost_per_pass_usd":0.891218,"run_count":6,"verified_runs":0},"settings":{"temperature":null,"thinking_budget":null,"tokens_avg_per_run":301476,"consistency_pct":65.63},"history":[{"run_id":"71c98551-f4a2-4c8b-9cfe-d0b168ab51e3","ts":"2026-05-04T23:37:17.408Z","score":59.831461,"cost_usd":9.24869,"tier":"claimed"},{"run_id":"ad822bdf-7e31-4df0-b98c-72743083f499","ts":"2026-05-04T20:18:33.859Z","score":57.417582,"cost_usd":9.7818,"tier":"claimed"},{"run_id":"7cc07e45-4fdd-4089-bf1e-563f36556abb","ts":"2026-05-04T16:43:38.341Z","score":62.087912,"cost_usd":9.03436,"tier":"claimed"},{"run_id":"531dd810-cee5-416c-8c08-7ca13ce6611b","ts":"2026-04-26T06:40:17.386Z","score":52.916667,"cost_usd":5.663265,"tier":"claimed"},{"run_id":"bca784ab-ec7a-4420-b918-806b2290e253","ts":"2026-04-26T05:44:59.015Z","score":52.173913,"cost_usd":6.144085,"tier":"claimed"},{"run_id":"765b434a-ca1a-44cb-aa0f-9a72a98e5319","ts":"2026-04-26T04:38:01.707Z","score":57.902299,"cost_usd":5.579915,"tier":"claimed"}],"failure_modes":[{"code":"AL0104","count":227,"pct":0.324286,"example_message":"Syntax error, '{' expected"},{"code":"AL0000","count":118,"pct":0.168571,"example_message":"App generation failed"},{"code":"AL0111","count":66,"pct":0.094286,"example_message":"Semicolon expected. Add a semicolon (;) to terminate the statement."},{"code":"AL0185","count":61,"pct":0.087143,"example_message":"Interface 'Payment Processor' is missing"},{"code":"AL0132","count":50,"pct":0.071429,"example_message":"'System' does not contain a definition for 'CreateSequentialGuid'"},{"code":"AL0360","count":50,"pct":0.071429,"example_message":"Text literal was not properly terminated. Use the character ' to terminate the literal."},{"code":"AL0107","count":36,"pct":0.051429,"example_message":"Syntax error, identifier expected. Provide a valid name (letters, digits, and underscores only)."},{"code":"AL0198","count":22,"pct":0.031429,"example_message":"Expected one of the application object keywords (table, tableextension, page, pageextension, pagecustomization, profile, profileextension, codeunit, report, reportextension, xmlport, query, controladdin, dotnet, enum, enumextension, interface, permissionset, permissionsetextension, entitlement)"},{"code":"AL0105","count":15,"pct":0.021429,"example_message":"Syntax error, identifier expected; 'key' is a keyword"},{"code":"AL0133","count":10,"pct":0.014286,"example_message":"Argument 2: cannot convert from 'Text' to 'Boolean'"}],"recent_runs":[{"run_id":"71c98551-f4a2-4c8b-9cfe-d0b168ab51e3","ts":"2026-05-04T23:37:17.408Z","score":59.831461,"cost_usd":9.24869,"tier":"claimed"},{"run_id":"ad822bdf-7e31-4df0-b98c-72743083f499","ts":"2026-05-04T20:18:33.859Z","score":57.417582,"cost_usd":9.7818,"tier":"claimed"},{"run_id":"7cc07e45-4fdd-4089-bf1e-563f36556abb","ts":"2026-05-04T16:43:38.341Z","score":62.087912,"cost_usd":9.03436,"tier":"claimed"},{"run_id":"531dd810-cee5-416c-8c08-7ca13ce6611b","ts":"2026-04-26T06:40:17.386Z","score":52.916667,"cost_usd":5.663265,"tier":"claimed"},{"run_id":"bca784ab-ec7a-4420-b918-806b2290e253","ts":"2026-04-26T05:44:59.015Z","score":52.173913,"cost_usd":6.144085,"tier":"claimed"},{"run_id":"765b434a-ca1a-44cb-aa0f-9a72a98e5319","ts":"2026-04-26T04:38:01.707Z","score":57.902299,"cost_usd":5.579915,"tier":"claimed"}]}