{"task_set_hash":"d881cfb43e8ccb89d3454eba476d3a70f98a2de157a58cf1a989390682044ee9","model":{"slug":"openrouter/qwen/qwen3.6-max-preview","display_name":"Qwen: Qwen3.6 Max Preview","api_model_id":"qwen/qwen3.6-max-preview","family_slug":"qwen3.6","added_at":"2026-04-27","settings_suffix":""},"aggregates":{"avg_score":56.075175,"tasks_attempted":286,"tasks_passed":136,"tasks_attempted_distinct":64,"tasks_passed_attempt_1":40,"tasks_passed_attempt_2_only":9,"pass_at_n":0.765625,"avg_cost_usd":0.174641,"latency_p50_ms":244874,"latency_p95_ms":390167,"pass_rate_ci":{"lower":0.6486650974929234,"upper":0.8525023278036498},"pass_hat_at_n":0.640625,"cost_per_pass_usd":0.228102,"run_count":3,"verified_runs":0},"settings":{"temperature":null,"thinking_budget":null,"tokens_avg_per_run":652329,"consistency_pct":67.19},"history":[{"run_id":"ab7b5438-b132-42f8-a6a4-e60ea3801047","ts":"2026-05-05T22:13:18.651Z","score":52.806122,"cost_usd":3.94404,"tier":"claimed","status":"completed","completed_at":"2026-05-05T23:50:03.714Z","tasks_attempted":64,"tasks_passed":43,"duration_ms":25090192},{"run_id":"0defa74e-bfbd-4718-8938-f93a17c269c3","ts":"2026-05-05T20:47:02.870Z","score":59.946237,"cost_usd":3.361837,"tier":"claimed","status":"completed","completed_at":"2026-05-05T23:48:42.319Z","tasks_attempted":64,"tasks_passed":47,"duration_ms":23183930},{"run_id":"8dec1145-326b-4b54-92b3-6093a70b31dd","ts":"2026-05-05T19:21:09.625Z","score":55.657895,"cost_usd":3.871141,"tier":"claimed","status":"completed","completed_at":"2026-05-05T23:48:20.791Z","tasks_attempted":64,"tasks_passed":46,"duration_ms":24753126}],"failure_modes":[{"code":"AL0104","count":125,"pct":0.197161,"example_message":"Syntax error, ')' expected"},{"code":"AL0000","count":111,"pct":0.175079,"example_message":"App generation failed"},{"code":"AL0132","count":105,"pct":0.165615,"example_message":"'Text' does not contain a definition for 'Length'"},{"code":"AL0111","count":43,"pct":0.067823,"example_message":"Semicolon expected. Add a semicolon (;) to terminate the statement."},{"code":"AL0107","count":35,"pct":0.055205,"example_message":"Syntax error, identifier expected. Provide a valid name (letters, digits, and underscores only)."},{"code":"AL0360","count":30,"pct":0.047319,"example_message":"Text literal was not properly terminated. Use the character ' to terminate the literal."},{"code":"AL0133","count":23,"pct":0.036278,"example_message":"Argument 2: cannot convert from 'Text' to 'Boolean'"},{"code":"AL0118","count":19,"pct":0.029968,"example_message":"The name 'CreateSequentialGuid' does not exist in the current context."},{"code":"AL0126","count":18,"pct":0.028391,"example_message":"No overload for method 'Field' takes 2 arguments. Candidates: built-in method 'Field(Integer)', built-in method 'Field(Text)'"},{"code":"AL0198","count":18,"pct":0.028391,"example_message":"Expected one of the application object keywords (table, tableextension, page, pageextension, pagecustomization, profile, profileextension, codeunit, report, reportextension, xmlport, query, controladdin, dotnet, enum, enumextension, interface, permissionset, permissionsetextension, entitlement)"}],"recent_runs":[{"run_id":"ab7b5438-b132-42f8-a6a4-e60ea3801047","ts":"2026-05-05T22:13:18.651Z","score":52.806122,"cost_usd":3.94404,"tier":"claimed","status":"completed","completed_at":"2026-05-05T23:50:03.714Z","tasks_attempted":64,"tasks_passed":43,"duration_ms":25090192},{"run_id":"0defa74e-bfbd-4718-8938-f93a17c269c3","ts":"2026-05-05T20:47:02.870Z","score":59.946237,"cost_usd":3.361837,"tier":"claimed","status":"completed","completed_at":"2026-05-05T23:48:42.319Z","tasks_attempted":64,"tasks_passed":47,"duration_ms":23183930},{"run_id":"8dec1145-326b-4b54-92b3-6093a70b31dd","ts":"2026-05-05T19:21:09.625Z","score":55.657895,"cost_usd":3.871141,"tier":"claimed","status":"completed","completed_at":"2026-05-05T23:48:20.791Z","tasks_attempted":64,"tasks_passed":46,"duration_ms":24753126}]}