-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathrun_and_eval_combined.sh
More file actions
119 lines (97 loc) · 3.75 KB
/
run_and_eval_combined.sh
File metadata and controls
119 lines (97 loc) · 3.75 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
#!/bin/bash
SEED=42
THRESHOLD='mean'
DIVIDE_VERSION='v4'
# Specify the GPU to use
export CUDA_VISIBLE_DEVICES="0"
# Fixed parameters
MAX_INTERRUPTS=3
parameter=1.5
port=20008
MODEL_PATH="" # Set the model path
MODEL_Baseline="" # Set the baseline model name
data_type=AIME24
# ========== Path Configuration ==========
RESULTS_DIR="results_test/${parameter}B/${data_type}/divide_${DIVIDE_VERSION}"
LOGS_DIR="${RESULTS_DIR}/logs"
EXPERIMENT_LOGS_DIR="experiment_logs_${parameter}B_${data_type}"
# ========== Threshold Parameter Conversion ==========
if [ "$THRESHOLD" = "mean" ]; then
THRESHOLD_PARAM="v3"
elif [ "$THRESHOLD" = "0.69" ]; then
THRESHOLD_PARAM="v0"
elif [ "$THRESHOLD" = "0.7" ]; then
THRESHOLD_PARAM="v1"
elif [ "$THRESHOLD" = "0.71" ]; then
THRESHOLD_PARAM="v2"
elif [ "$THRESHOLD" = "ema" ]; then
THRESHOLD_PARAM="v4"
else
THRESHOLD_PARAM=$THRESHOLD
fi
# ========== Step 1: Create necessary directories ==========
mkdir -p ${LOGS_DIR}
mkdir -p ${EXPERIMENT_LOGS_DIR}
# ========== Step 2: Log the run status ==========
STATUS_FILE="${EXPERIMENT_LOGS_DIR}/combined_run_status.log"
echo "Experiment start time: $(date)" > $STATUS_FILE
echo "====================================" >> $STATUS_FILE
echo "Starting the combined experiment" >> $STATUS_FILE
echo "Parameters: SEED=$SEED, THRESHOLD=$THRESHOLD, DIVIDE_VERSION=$DIVIDE_VERSION" >> $STATUS_FILE
echo "Start time: $(date)" >> $STATUS_FILE
# ========== Step 3: Run the generation phase ==========
echo "=============================================="
echo "Step 1: Starting the generation phase..."
echo "=============================================="
log_file="${LOGS_DIR}/run_max_interrupts_${MAX_INTERRUPTS}_threshold_${THRESHOLD}_${SEED}.out"
echo "Running generation command: python -u generation_update.py ..." >> $STATUS_FILE
python -u generation_update.py --model_path ${MODEL_PATH} \
--port ${port} \
--save_root_dir "${RESULTS_DIR}/max_interrupts_${MAX_INTERRUPTS}_threshold_${THRESHOLD}_${SEED}" \
--data_path "dataset/${data_type}/test.jsonl" \
--max_interrupts ${MAX_INTERRUPTS} \
--deepen_prompt_version "v3" \
--interrupt_signals_version "1-2-3-4-5-6-7-8-9-10-11" \
--system_prompt_version "v0" \
--question_prefix_version "v0" \
--question_suffix_version "v2" \
--gt_answer_key 'answer' \
--temperature 0.6 \
--top_p 0.95 \
--max_tokens 32768 \
--question_id "id" \
--question_key 'problem' \
--extend_max_tokens 1000 \
--seed ${SEED} \
--prm_model_path ""\ # Set the PRM model path
--divide_step_method ${DIVIDE_VERSION} \
--threshold ${THRESHOLD_PARAM}
echo "Generation phase completion time: $(date)" >> $STATUS_FILE
# ========== Step 4: Wait for the system to stabilize ==========
echo ""
echo "=============================================="
echo "Step 2: Waiting 10 seconds for the system to stabilize..."
echo "=============================================="
for i in {10..1}; do
echo -ne "Countdown: $i seconds\r"
sleep 1
done
echo ""
# ========== Step 5: Run the evaluation phase ==========
echo "=============================================="
echo "Step 3: Starting the evaluation phase..."
echo "=============================================="
RESULTS_JSONL_DIR="${RESULTS_DIR}/max_interrupts_${MAX_INTERRUPTS}_threshold_${THRESHOLD}_${SEED}/jsonl"
echo "Running evaluation command: python eval.py ..." >> $STATUS_FILE
python eval.py --root_dir "${RESULTS_JSONL_DIR}" \
--gt_answer_key 'answer' \
--eval_mode 'symeval' \
--id_key 'id'
echo "Evaluation phase completion time: $(date)" >> $STATUS_FILE
# ========== Done ==========
echo ""
echo "=============================================="
echo "All tasks completed!"
echo "End time: $(date)"
echo "For detailed logs, please check: $STATUS_FILE"
echo "=============================================="