{"task_set_hash":"d881cfb43e8ccb89d3454eba476d3a70f98a2de157a58cf1a989390682044ee9","model":{"slug":"openrouter/qwen/qwen3.5-plus-20260420","display_name":"Qwen: Qwen3.5 Plus 2026-04-20","api_model_id":"qwen/qwen3.5-plus-20260420","family_slug":"qwen3.5","added_at":"2026-04-27","settings_suffix":""},"aggregates":{"avg_score":38.379205,"tasks_attempted":327,"tasks_passed":98,"tasks_attempted_distinct":64,"tasks_passed_attempt_1":27,"tasks_passed_attempt_2_only":14,"pass_at_n":0.640625,"avg_cost_usd":0.073035,"latency_p50_ms":162965,"latency_p95_ms":278742,"pass_rate_ci":{"lower":0.5182062363615084,"upper":0.7471176946778538},"pass_hat_at_n":0.375,"cost_per_pass_usd":0.114005,"run_count":3,"verified_runs":0},"settings":{"temperature":null,"thinking_budget":null,"tokens_avg_per_run":718573,"consistency_pct":59.38},"history":[{"run_id":"9a9a4d0e-ca69-4c5d-9755-d9bf674abcfe","ts":"2026-05-05T22:13:18.650Z","score":37.837838,"cost_usd":1.650182,"tier":"claimed","status":"completed","completed_at":"2026-05-05T23:50:15.363Z","tasks_attempted":64,"tasks_passed":32,"duration_ms":19100757},{"run_id":"bc365892-44dd-4cd2-a55c-aa9a304403f8","ts":"2026-05-05T20:47:02.870Z","score":39.602804,"cost_usd":1.412092,"tier":"claimed","status":"completed","completed_at":"2026-05-05T23:49:53.475Z","tasks_attempted":64,"tasks_passed":33,"duration_ms":17024671},{"run_id":"2e16b76a-74d2-4f6e-9edf-776aa2fc9e8c","ts":"2026-05-05T19:21:09.625Z","score":37.729358,"cost_usd":1.611948,"tier":"claimed","status":"completed","completed_at":"2026-05-05T23:48:33.105Z","tasks_attempted":64,"tasks_passed":33,"duration_ms":18833482}],"failure_modes":[{"code":"AL0104","count":307,"pct":0.240784,"example_message":"Syntax error, ')' expected"},{"code":"AL0000","count":185,"pct":0.145098,"example_message":"App generation failed"},{"code":"AL0132","count":159,"pct":0.124706,"example_message":"'Record Item' does not contain a definition for 'Warranty Period'"},{"code":"AL0111","count":138,"pct":0.108235,"example_message":"Semicolon expected. Add a semicolon (;) to terminate the statement."},{"code":"AL0118","count":94,"pct":0.073725,"example_message":"The name 'CreateSequentialGuid' does not exist in the current context."},{"code":"AL0198","count":66,"pct":0.051765,"example_message":"Expected one of the application object keywords (table, tableextension, page, pageextension, pagecustomization, profile, profileextension, codeunit, report, reportextension, xmlport, query, controladdin, dotnet, enum, enumextension, interface, permissionset, permissionsetextension, entitlement)"},{"code":"AL0107","count":57,"pct":0.044706,"example_message":"Syntax error, identifier expected. Provide a valid name (letters, digits, and underscores only)."},{"code":"AL0133","count":38,"pct":0.029804,"example_message":"Argument 2: cannot convert from 'Text' to 'Boolean'"},{"code":"AL0360","count":36,"pct":0.028235,"example_message":"Text literal was not properly terminated. Use the character ' to terminate the literal."},{"code":"AL0126","count":35,"pct":0.027451,"example_message":"No overload for method 'LogMessage' takes 5 arguments. Candidates: built-in method 'LogMessage(Text, Text, Verbosity, DataClassification, TelemetryScope, Text, Text, [Text], [Text])', built-in method 'LogMessage(Text, Text, Verbosity, DataClassification, TelemetryScope, Dictionary of [Text, Text])'"}],"recent_runs":[{"run_id":"9a9a4d0e-ca69-4c5d-9755-d9bf674abcfe","ts":"2026-05-05T22:13:18.650Z","score":37.837838,"cost_usd":1.650182,"tier":"claimed","status":"completed","completed_at":"2026-05-05T23:50:15.363Z","tasks_attempted":64,"tasks_passed":32,"duration_ms":19100757},{"run_id":"bc365892-44dd-4cd2-a55c-aa9a304403f8","ts":"2026-05-05T20:47:02.870Z","score":39.602804,"cost_usd":1.412092,"tier":"claimed","status":"completed","completed_at":"2026-05-05T23:49:53.475Z","tasks_attempted":64,"tasks_passed":33,"duration_ms":17024671},{"run_id":"2e16b76a-74d2-4f6e-9edf-776aa2fc9e8c","ts":"2026-05-05T19:21:09.625Z","score":37.729358,"cost_usd":1.611948,"tier":"claimed","status":"completed","completed_at":"2026-05-05T23:48:33.105Z","tasks_attempted":64,"tasks_passed":33,"duration_ms":18833482}]}