{"task_set_hash":"b31c942bd4e8afcfc48e238184a7d6c6db5286b138ceeeeaec206027007f7ff4","model":{"slug":"anthropic/claude-fable-5","display_name":"Claude Fable 5","api_model_id":"claude-fable-5","family_slug":"claude","added_at":"2026-06-09","settings_suffix":"","max_input_tokens":1000000,"max_output_tokens":128000,"capabilities":["thinking","image","pdf","structured","batch"]},"aggregates":{"avg_score":79.83871,"tasks_attempted":403,"tasks_passed":303,"tasks_attempted_distinct":110,"tasks_passed_attempt_1":90,"tasks_passed_attempt_2_only":12,"pass_at_n":0.927273,"avg_cost_usd":0.344893,"latency_p50_ms":25187,"latency_p95_ms":175889,"pass_rate_ci":{"lower":0.8630186778224797,"upper":0.962690026201357},"pass_hat_at_n":0.9090909090909091,"cost_per_pass_usd":0.371943,"run_count":3,"verified_runs":0},"settings":{"temperature":null,"thinking_budget":null,"tokens_avg_per_run":410323,"consistency_pct":90},"history":[{"run_id":"021a60c0-85d1-4ce5-ab68-16f1fa1dd475","ts":"2026-06-09T23:09:51.508Z","score":81.39313,"cost_usd":12.37219,"tier":"claimed","status":"completed","completed_at":"2026-06-09T23:27:59.767Z","tasks_attempted":110,"tasks_passed":101,"duration_ms":4723529},{"run_id":"05dc356f-e4b9-4baf-a85f-9d48cf808fa8","ts":"2026-06-09T18:22:55.973Z","score":78.740876,"cost_usd":12.41236,"tier":"claimed","status":"completed","completed_at":"2026-06-09T19:25:24.263Z","tasks_attempted":110,"tasks_passed":101,"duration_ms":6191620},{"run_id":"9ab3a769-5213-4a2a-8525-c1257524d4c3","ts":"2026-06-09T17:47:53.996Z","score":79.444444,"cost_usd":13.15365,"tier":"claimed","status":"completed","completed_at":"2026-06-09T19:25:04.830Z","tasks_attempted":110,"tasks_passed":101,"duration_ms":7384859}],"failure_modes":[{"code":"AL0104","count":114,"pct":0.368932,"example_message":"Syntax error, ')' expected"},{"code":"AL0000","count":68,"pct":0.220065,"example_message":"App generation failed"},{"code":"AL0111","count":27,"pct":0.087379,"example_message":"Semicolon expected. Add a semicolon (;) to terminate the statement."},{"code":"AL0360","count":17,"pct":0.055016,"example_message":"Text literal was not properly terminated. Use the character ' to terminate the literal."},{"code":"AL0107","count":8,"pct":0.02589,"example_message":"Syntax error, identifier expected. Provide a valid name (letters, digits, and underscores only)."},{"code":"AL0169","count":8,"pct":0.02589,"example_message":"The option value 'ReadOnly' is not valid. Check the enum definition for valid values."},{"code":"AL0761","count":8,"pct":0.02589,"example_message":"An incorrect value was used for the category. One of the values of the enum 2000000001 EventCategory is expected which is available in platform version 22.0.0.0 and higher."},{"code":"AL0118","count":6,"pct":0.019417,"example_message":"The name 'CreateSequentialGuid' does not exist in the current context."},{"code":"AL0198","count":6,"pct":0.019417,"example_message":"Expected one of the application object keywords (table, tableextension, page, pageextension, pagecustomization, profile, profileextension, codeunit, report, reportextension, xmlport, query, controladdin, dotnet, enum, enumextension, interface, permissionset, permissionsetextension, entitlement)"},{"code":"AL0132","count":5,"pct":0.016181,"example_message":"'Record Product' does not contain a definition for 'Product Code'"}],"recent_runs":[{"run_id":"021a60c0-85d1-4ce5-ab68-16f1fa1dd475","ts":"2026-06-09T23:09:51.508Z","score":81.39313,"cost_usd":12.37219,"tier":"claimed","status":"completed","completed_at":"2026-06-09T23:27:59.767Z","tasks_attempted":110,"tasks_passed":101,"duration_ms":4723529},{"run_id":"05dc356f-e4b9-4baf-a85f-9d48cf808fa8","ts":"2026-06-09T18:22:55.973Z","score":78.740876,"cost_usd":12.41236,"tier":"claimed","status":"completed","completed_at":"2026-06-09T19:25:24.263Z","tasks_attempted":110,"tasks_passed":101,"duration_ms":6191620},{"run_id":"9ab3a769-5213-4a2a-8525-c1257524d4c3","ts":"2026-06-09T17:47:53.996Z","score":79.444444,"cost_usd":13.15365,"tier":"claimed","status":"completed","completed_at":"2026-06-09T19:25:04.830Z","tasks_attempted":110,"tasks_passed":101,"duration_ms":7384859}],"predecessor":{"slug":"anthropic/claude-opus-4-7","display_name":"Claude Opus 4.7","avg_score":70.049361,"avg_cost_usd":0.2165}}