{
    "id": "f13988dd-81f6-4aa3-83e1-aa1e621d288e",
    "started_at": "2026-06-29T16:42:00.605355",
    "updated_at": "2026-06-29T18:37:10.322785",
    "finished_at": "2026-06-29T18:37:10.322785",
    "n_total_trials": 89,
    "stats": {
        "n_completed_trials": 89,
        "n_errored_trials": 21,
        "n_running_trials": 0,
        "n_pending_trials": 0,
        "n_cancelled_trials": 0,
        "n_retries": 1,
        "evals": {
            "opencode__minimax/minimax-m3__terminal-bench/terminal-bench-2": {
                "n_trials": 88,
                "n_errors": 21,
                "metrics": [
                    {
                        "mean": 0.43820224719101125
                    }
                ],
                "pass_at_k": {},
                "reward_stats": {
                    "reward": {
                        "0.0": [
                            "make-mips-interpreter__aaKgnTK",
                            "circuit-fibsqrt__r6vixhC",
                            "overfull-hbox__Sjpv3ad",
                            "video-processing__UcYWLVm",
                            "distribution-search__F3b9TZb",
                            "protein-assembly__rDc74kF",
                            "path-tracing__7Qm2rLR",
                            "compile-compcert__reRroKL",
                            "dna-assembly__nDHMAvf",
                            "install-windows-3-11__J6q9KxU",
                            "openssl-selfsigned-cert__VXZhqqJ",
                            "feal-linear-cryptanalysis__zLm6398",
                            "polyglot-rust-c__i5MFo2C",
                            "cancel-async-tasks__vDZdkvv",
                            "dna-insert__ueSrqZe",
                            "db-wal-recovery__nW2xbU7",
                            "extract-elf__8KL2jBt",
                            "winning-avg-corewars__K2mbe3a",
                            "mteb-leaderboard__FtgvG6A",
                            "regex-chess__V5BcZdH",
                            "mailman__gBYUPVf",
                            "write-compressor__5ZyGNgH",
                            "chess-best-move__48RcsZG",
                            "extract-moves-from-video__DfpWfPo",
                            "gcode-to-text__djRdkoc",
                            "merge-diff-arc-agi-task__MFUtmnX",
                            "kv-store-grpc__bMjwTXs",
                            "nginx-request-logging__bvTAe6R",
                            "sqlite-with-gcov__6bEEk6y",
                            "sanitize-git-repo__Q3Fqrgi",
                            "train-fasttext__NYXYPD8",
                            "torch-tensor-parallelism__UmqckPx",
                            "schemelike-metacircular-eval__3XYxxXB",
                            "torch-pipeline-parallelism__7a39tvS",
                            "raman-fitting__YARYZYZ",
                            "filter-js-from-html__Sbxs4Di",
                            "largest-eigenval__oFCwrvG",
                            "polyglot-c-py__m8xab3g",
                            "bn-fit-modify__QkXxjWQ",
                            "password-recovery__avqUk3A",
                            "feal-differential-cryptanalysis__hVaEjDA",
                            "sam-cell-seg__uiywTkn",
                            "make-doom-for-mips__3acEcfW",
                            "adaptive-rejection-sampler__M7gdiTP",
                            "gpt2-codegolf__yya3Zva",
                            "mteb-retrieve__AjhmEDP",
                            "path-tracing-reverse__MhBV3t4",
                            "qemu-startup__4bSbaDv",
                            "configure-git-webserver__vm4V4N3"
                        ],
                        "1.0": [
                            "build-pov-ray__spA23Jj",
                            "break-filter-js-from-html__QX5RuaP",
                            "caffe-cifar-10__X5Pujtr",
                            "log-summary-date-ranges__df4yzXC",
                            "vulnerable-secret__b6u8iNU",
                            "crack-7z-hash__okYasPj",
                            "rstan-to-pystan__ZKLyxT6",
                            "git-leak-recovery__xMReMCp",
                            "headless-terminal__mpbYxAP",
                            "constraints-scheduling__bQ7xYcd",
                            "custom-memory-heap-crash__iSYMdu9",
                            "build-cython-ext__3RNwftA",
                            "fix-git__MSuK5oe",
                            "prove-plus-comm__9oe5vUp",
                            "pytorch-model-recovery__cKTUYde",
                            "multi-source-data-merger__DSCxGqT",
                            "git-multibranch__ZCr5Kh5",
                            "qemu-alpine-ssh__yqX8hbP",
                            "regex-log__nXW3Jj6",
                            "tune-mjcf__CYHdUse",
                            "build-pmars__pqZrz7v",
                            "reshard-c4-data__Lg6m2iN",
                            "count-dataset-tokens__uWw3gLG",
                            "cobol-modernization__2JumDf5",
                            "pypi-server__Fd975zL",
                            "model-extraction-relu-logits__Gj84xeL",
                            "hf-model-inference__jXZGvzU",
                            "sparql-university__Uhv9REs",
                            "fix-ocaml-gc__vS9EzC4",
                            "pytorch-model-cli__zcJaUVA",
                            "sqlite-db-truncate__armCPzG",
                            "fix-code-vulnerability__irbz4WC",
                            "modernize-scientific-stack__4R27NYk",
                            "mcmc-sampling-stan__Md6pirU",
                            "financial-document-processor__k6kShFb",
                            "large-scale-text-editing__xZbXyiR",
                            "code-from-image__WeK8id8",
                            "portfolio-optimization__NupjZqF",
                            "llm-inference-batching-scheduler__BCnnBZ6"
                        ]
                    }
                },
                "exception_stats": {
                    "AgentTimeoutError": [
                        "make-mips-interpreter__aaKgnTK",
                        "overfull-hbox__Sjpv3ad",
                        "protein-assembly__rDc74kF",
                        "path-tracing__7Qm2rLR",
                        "caffe-cifar-10__X5Pujtr",
                        "db-wal-recovery__nW2xbU7",
                        "winning-avg-corewars__K2mbe3a",
                        "mailman__gBYUPVf",
                        "extract-moves-from-video__DfpWfPo",
                        "gcode-to-text__djRdkoc",
                        "qemu-alpine-ssh__yqX8hbP",
                        "train-fasttext__NYXYPD8",
                        "schemelike-metacircular-eval__3XYxxXB",
                        "largest-eigenval__oFCwrvG",
                        "fix-ocaml-gc__vS9EzC4",
                        "password-recovery__avqUk3A",
                        "make-doom-for-mips__3acEcfW",
                        "mcmc-sampling-stan__Md6pirU",
                        "path-tracing-reverse__MhBV3t4",
                        "llm-inference-batching-scheduler__BCnnBZ6"
                    ],
                    "VerifierTimeoutError": [
                        "query-optimize__UtYFsXj"
                    ]
                }
            }
        },
        "n_input_tokens": 229437954,
        "n_cache_tokens": 225546818,
        "n_output_tokens": 914694,
        "cost_usd": null
    }
}