Skip to content
Open
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
87 changes: 87 additions & 0 deletions hpc/harbor_yaml/trace_docker_16concurrency_eval_ctx32k.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,87 @@
# Harbor eval configuration with Docker backend (16 concurrency, 32k context).
# Uses local Docker/Podman runtime instead of Daytona cloud service.
# Suitable for: local development, SLURM with Podman, cloud instances with Docker.

job_name: docker-eval-job
jobs_dir: trace_jobs
n_attempts: 3
timeout_multiplier: 1.0
debug: false

orchestrator:
type: local
n_concurrent_trials: 16
quiet: false
plain_output: true
adaptive_concurrency:
enabled: false
algorithm: gradient2
min_limit:
max_limit:
metrics_endpoint:
metrics_timeout_sec: 10.0
poll_interval_sec: 120.0
window_size: 5
queue_p95_drop_threshold:
algorithm_kwargs: {}
retry:
max_retries: 10
include_exceptions:
exclude_exceptions:
- AgentTimeoutError
- VerifierTimeoutError
- SandboxBuildFailedError
wait_multiplier: 2.0
min_wait_sec: 1.0
max_wait_sec: 90.0
kwargs: {}

environment:
type: docker
force_build: true
delete: true
override_cpus: 1
override_memory_mb: 2048
override_storage_mb: 2048
kwargs: {}

verifier:
override_timeout_sec:
max_timeout_sec:
disable: false

metrics: []

agents:
- name: terminus-2
import_path:
model_name: placeholder/override-at-runtime
max_timeout_sec:
kwargs:
record_terminal_session: false
collect_rollout_details: false
collect_engine_metrics: false
metrics_endpoint: https://replace-with-vllm-host/metrics
metrics_timeout_sec: 10
model_info:
max_input_tokens: 32768
max_output_tokens: 8192
input_cost_per_token: 0
output_cost_per_token: 0

trajectory_config:
raw_content: true
linear_history: true
enable_summarize: true
proactive_summarization_threshold: 8192
interleaved_thinking: true
parser_name: json
tmux_pane_width: 160
tmux_pane_height: 40
extra_body:
chat_template_kwargs:
enable_thinking: true
datasets:
- path: /replace/with/tasks/path

tasks: []