{"task_set_hash":"b31c942bd4e8afcfc48e238184a7d6c6db5286b138ceeeeaec206027007f7ff4","model":{"slug":"anthropic/claude-haiku-4-5-20251001","display_name":"Claude Haiku 4 5 20251001","api_model_id":"claude-haiku-4-5-20251001","family_slug":"claude","added_at":"2026-05-05T08:29:04.135Z","settings_suffix":"","max_input_tokens":200000,"max_output_tokens":64000,"capabilities":["thinking","image","pdf","structured","batch"]},"aggregates":{"avg_score":44.840525,"tasks_attempted":533,"tasks_passed":199,"tasks_attempted_distinct":110,"tasks_passed_attempt_1":46,"tasks_passed_attempt_2_only":26,"pass_at_n":0.654545,"avg_cost_usd":0.020328,"latency_p50_ms":14623,"latency_p95_ms":140216,"pass_rate_ci":{"lower":0.5618231409752017,"upper":0.7368374540972498},"pass_hat_at_n":0.5454545454545454,"cost_per_pass_usd":0.031056,"run_count":3,"verified_runs":0},"settings":{"temperature":null,"thinking_budget":null,"tokens_avg_per_run":278764,"consistency_pct":84.55},"history":[{"run_id":"beb015b1-d92e-4ce8-ba58-5d8f83fba190","ts":"2026-05-29T04:23:51.269Z","score":45.014045,"cost_usd":0.800243,"tier":"claimed","status":"completed","completed_at":"2026-05-29T08:06:53.068Z","tasks_attempted":110,"tasks_passed":67,"duration_ms":4365180},{"run_id":"01e4c1c3-08b2-4668-9ab2-74a39d944662","ts":"2026-05-29T01:01:20.143Z","score":43.926554,"cost_usd":0.71926,"tier":"claimed","status":"completed","completed_at":"2026-05-29T08:04:56.146Z","tasks_attempted":110,"tasks_passed":64,"duration_ms":4725903},{"run_id":"2fff6752-91c5-4d88-9560-c34647d29135","ts":"2026-05-28T21:35:35.221Z","score":45.575843,"cost_usd":0.716534,"tier":"claimed","status":"completed","completed_at":"2026-05-29T08:02:54.174Z","tasks_attempted":110,"tasks_passed":68,"duration_ms":4597244}],"failure_modes":[{"code":"AL0104","count":341,"pct":0.202976,"example_message":"Syntax error, 'end' expected"},{"code":"AL0000","count":269,"pct":0.160119,"example_message":"App generation failed"},{"code":"AL0132","count":208,"pct":0.12381,"example_message":"'System' does not contain a definition for 'CreateSequentialGuid'"},{"code":"AL0185","count":150,"pct":0.089286,"example_message":"Interface 'INotificationChannel' is missing"},{"code":"AL0118","count":106,"pct":0.063095,"example_message":"The name 'Chr' does not exist in the current context."},{"code":"AL0107","count":89,"pct":0.052976,"example_message":"Syntax error, identifier expected. Provide a valid name (letters, digits, and underscores only)."},{"code":"AL0111","count":80,"pct":0.047619,"example_message":"Semicolon expected. Add a semicolon (;) to terminate the statement."},{"code":"AL0198","count":55,"pct":0.032738,"example_message":"Expected one of the application object keywords (table, tableextension, page, pageextension, pagecustomization, profile, profileextension, codeunit, report, reportextension, xmlport, query, controladdin, dotnet, enum, enumextension, interface, permissionset, permissionsetextension, entitlement)"},{"code":"AL0105","count":44,"pct":0.02619,"example_message":"Syntax error, identifier expected; 'key' is a keyword"},{"code":"AL0360","count":36,"pct":0.021429,"example_message":"Text literal was not properly terminated. Use the character ' to terminate the literal."}],"recent_runs":[{"run_id":"beb015b1-d92e-4ce8-ba58-5d8f83fba190","ts":"2026-05-29T04:23:51.269Z","score":45.014045,"cost_usd":0.800243,"tier":"claimed","status":"completed","completed_at":"2026-05-29T08:06:53.068Z","tasks_attempted":110,"tasks_passed":67,"duration_ms":4365180},{"run_id":"01e4c1c3-08b2-4668-9ab2-74a39d944662","ts":"2026-05-29T01:01:20.143Z","score":43.926554,"cost_usd":0.71926,"tier":"claimed","status":"completed","completed_at":"2026-05-29T08:04:56.146Z","tasks_attempted":110,"tasks_passed":64,"duration_ms":4725903},{"run_id":"2fff6752-91c5-4d88-9560-c34647d29135","ts":"2026-05-28T21:35:35.221Z","score":45.575843,"cost_usd":0.716534,"tier":"claimed","status":"completed","completed_at":"2026-05-29T08:02:54.174Z","tasks_attempted":110,"tasks_passed":68,"duration_ms":4597244}]}