Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
41 changes: 41 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -217,3 +217,44 @@ agentlightning/dashboard/**/*.svg

# Docker data
docker/data/
examples/spider/log/spider_20251214_211340/config.json
examples/spider/log/spider_20251214_211340/cpu_monitor.csv
examples/spider/log/spider_20251214_211340/gpu_monitor.csv
examples/spider/log/spider_20251214_211340/hardware.txt
examples/spider/log/spider_20251214_211340/objectives.txt
examples/spider/log/spider_20251214_211340/progress.txt
examples/spider/log/spider_20251214_211340/sequence_lengths.csv
examples/spider/log/spider_20251214_212714/config.json
examples/spider/log/spider_20251214_212714/cpu_monitor.csv
examples/spider/log/spider_20251214_212714/error.txt
examples/spider/log/spider_20251214_212714/gpu_monitor.csv
examples/spider/log/spider_20251214_212714/hardware.txt
examples/spider/log/spider_20251214_212714/objectives.txt
examples/spider/log/spider_20251214_213237/config.json
examples/spider/log/spider_20251214_213237/cpu_monitor.csv
examples/spider/log/spider_20251214_213237/gpu_monitor.csv
examples/spider/log/spider_20251214_213237/hardware.txt
examples/spider/log/spider_20251214_213237/objectives.txt
examples/spider/log/spider_20251214_213237/progress.txt
examples/spider/log/spider_20251214_213237/sequence_lengths.csv
examples/spider/log/spider_local05_20251214185441_20251214_185441/config.json
examples/spider/log/spider_local05_20251214185441_20251214_185441/cpu_monitor.csv
examples/spider/log/spider_local05_20251214185441_20251214_185441/gpu_monitor.csv
examples/spider/log/spider_local05_20251214185441_20251214_185441/hardware.txt
examples/spider/log/spider_local05_20251214185441_20251214_185441/objectives.txt
examples/spider/log/spider_local05_20251214185441_20251214_185441/progress.txt
examples/spider/log/spider_local05_20251214185441_20251214_185441/sequence_lengths.csv
examples/spider/log/spider_local05_20251214191542_20251214_191542/config.json
examples/spider/log/spider_local05_20251214191542_20251214_191542/cpu_monitor.csv
examples/spider/log/spider_local05_20251214191542_20251214_191542/gpu_monitor.csv
examples/spider/log/spider_local05_20251214191542_20251214_191542/hardware.txt
examples/spider/log/spider_local05_20251214191542_20251214_191542/objectives.txt
examples/spider/log/spider_local05_20251214191542_20251214_191542/progress.txt
examples/spider/log/spider_local05_20251214191542_20251214_191542/sequence_lengths.csv
examples/spider/log/spider_local05_20251214201622_20251214_201622/config.json
examples/spider/log/spider_local05_20251214201622_20251214_201622/cpu_monitor.csv
examples/spider/log/spider_local05_20251214201622_20251214_201622/gpu_monitor.csv
examples/spider/log/spider_local05_20251214201622_20251214_201622/hardware.txt
examples/spider/log/spider_local05_20251214201622_20251214_201622/objectives.txt
examples/spider/log/spider_local05_20251214201622_20251214_201622/progress.txt
examples/spider/log/spider_local05_20251214201622_20251214_201622/sequence_lengths.csv
117 changes: 117 additions & 0 deletions agentlightning/verl/trainer.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,10 +5,13 @@
from __future__ import annotations

import random
import time
from contextlib import contextmanager
from copy import deepcopy
from datetime import datetime
from pprint import pprint
from typing import Dict, Tuple, Type
from pathlib import Path

import numpy as np
import torch
Expand Down Expand Up @@ -111,6 +114,12 @@ def compute_data_metrics(batch: DataProto, use_critic: bool = True, suffix: str
return_diff_var = torch.var(valid_returns - valid_values)
return_var = torch.var(valid_returns)

def _quantiles(tensor, probs):
return [torch.quantile(tensor.float(), p).detach().item() for p in probs]

prompt_q50, prompt_q95 = _quantiles(prompt_length, [0.5, 0.95])
response_q50, response_q95 = _quantiles(response_length, [0.5, 0.95])
Comment on lines +117 to +121
Copy link

Copilot AI Dec 15, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Consider extracting the hardcoded quantile values [0.5, 0.95] to named constants at the module level for better maintainability and consistency across the codebase.

Copilot uses AI. Check for mistakes.

metrics = {
# score
"critic/score/mean" + suffix: torch.mean(sequence_score).detach().item(),
Expand Down Expand Up @@ -146,12 +155,16 @@ def compute_data_metrics(batch: DataProto, use_critic: bool = True, suffix: str
"response_length/min" + suffix: torch.min(response_length).detach().item(),
"response_length/clip_ratio"
+ suffix: torch.mean(torch.eq(response_length, max_response_length).float()).detach().item(),
"response_length/p50" + suffix: response_q50,
"response_length/p95" + suffix: response_q95,
# prompt length
"prompt_length/mean" + suffix: torch.mean(prompt_length).detach().item(),
"prompt_length/max" + suffix: torch.max(prompt_length).detach().item(),
"prompt_length/min" + suffix: torch.min(prompt_length).detach().item(),
"prompt_length/clip_ratio"
+ suffix: torch.mean(torch.eq(prompt_length, max_prompt_length).float()).detach().item(),
"prompt_length/p50" + suffix: prompt_q50,
"prompt_length/p95" + suffix: prompt_q95,
}
return metrics

Expand Down Expand Up @@ -186,6 +199,19 @@ def __init__(
self.llm_proxy = llm_proxy
self.adapter = adapter
self.daemon_cls = daemon_cls
self._sequence_log_file = getattr(self.config.trainer, "sequence_log_file", None)
if self._sequence_log_file:
try:
path = Path(self._sequence_log_file)
path.parent.mkdir(parents=True, exist_ok=True)
if not path.exists():
with path.open("w", encoding="utf-8") as f:
f.write(
"step,prompt_mean,prompt_p50,prompt_p95,prompt_max,"
"response_mean,response_p50,response_p95,response_max\n"
)
except Exception:
self._sequence_log_file = None

def _validate(self):
assert len(self.val_dataloader) == 1, "Please set val_batch_size to None for better throughput."
Expand Down Expand Up @@ -236,6 +262,43 @@ def _compute_reference_log_prob(self, batch: DataProto) -> DataProto:
)
return ref_worker.compute_ref_log_prob(batch)

def _select_val_metric(self, val_metrics: dict) -> tuple[str, float | None]:
"""Pick a representative validation metric for logging."""

if not val_metrics:
return "val_reward", None
preferred_keys = [k for k in val_metrics if k.startswith("val-core/")]
fallback_keys = [k for k in val_metrics if k.startswith("val-aux/")]
candidate_keys = preferred_keys or fallback_keys or list(val_metrics.keys())
key = candidate_keys[0]
return key, val_metrics.get(key)

def _log_sequence_window(self, metrics: dict) -> None:
"""Persist compact prompt/response length stats for the current logging window."""

if not self._sequence_log_file:
return
prompt_mean = metrics.get("prompt_length/mean_after_processing", metrics.get("prompt_length/mean"))
prompt_p50 = metrics.get("prompt_length/p50_after_processing", metrics.get("prompt_length/p50"))
prompt_p95 = metrics.get("prompt_length/p95_after_processing", metrics.get("prompt_length/p95"))
prompt_max = metrics.get("prompt_length/max_after_processing", metrics.get("prompt_length/max"))
response_mean = metrics.get("response_length/mean_after_processing", metrics.get("response_length/mean"))
response_p50 = metrics.get("response_length/p50_after_processing", metrics.get("response_length/p50"))
response_p95 = metrics.get("response_length/p95_after_processing", metrics.get("response_length/p95"))
response_max = metrics.get("response_length/max_after_processing", metrics.get("response_length/max"))

if prompt_mean is None or response_mean is None:
return

try:
with open(self._sequence_log_file, "a", encoding="utf-8") as f:
f.write(
f"{self.global_steps},{prompt_mean},{prompt_p50},{prompt_p95},{prompt_max},"
f"{response_mean},{response_p50},{response_p95},{response_max}\n"
)
except Exception:
return

def _train_step(self, batch_dict: dict) -> dict:
# Isolate in a separate method to automatically recycle the variables before validation.
batch: DataProto = DataProto.from_single_dict(batch_dict)
Expand Down Expand Up @@ -481,6 +544,9 @@ def fit(self):

# add tqdm
progress_bar = tqdm(total=self.total_training_steps, initial=self.global_steps, desc="Training Progress")
n_gpus = self.resource_pool_manager.get_n_gpus()
self._cumulative_step_time = 0.0
self._cumulative_step_count = 0

# we start from step 1
self.global_steps += 1
Expand All @@ -506,6 +572,12 @@ def fit(self):
if is_last_step:
last_val_metrics = val_metrics
metrics.update(val_metrics)
# write validation summary to progress log
ts_val = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
key, val = self._select_val_metric(val_metrics)
self._progress_log(
f"[{ts_val}] [val] step {self.global_steps}/{self.total_training_steps} | {key}={val}"
)
Comment on lines +578 to +580
Copy link

Copilot AI Dec 15, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The _progress_log method is called but is never defined. This will result in an AttributeError at runtime. The method needs to be defined in the class, likely writing to a progress log file configured via config.trainer.progress_log_file.

Copilot uses AI. Check for mistakes.

if self.config.trainer.save_freq > 0 and (
is_last_step or self.global_steps % self.config.trainer.save_freq == 0
Expand All @@ -524,6 +596,51 @@ def fit(self):
# TODO: make a canonical logger that supports various backend
logger.log(data=metrics, step=self.global_steps)

# Lightweight console/file summary (mirrors Ray trainer behaviour)
self._log_window.append(
{
"tokens": metrics.get("perf/total_num_tokens", 0),
"step_time": metrics.get("perf/time_per_step", 0.0),
"reward": metrics.get("reward/mean", metrics.get("training/reward", None)),
}
)
Comment on lines +600 to +606
Copy link

Copilot AI Dec 15, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The _log_window attribute is appended to but never initialized in init. This will cause an AttributeError on first access. The attribute should be initialized in the init method along with other instance variables.

Copilot uses AI. Check for mistakes.
# update cumulative step time to stabilize ETA
step_time = metrics.get("perf/time_per_step", None)
if step_time is not None:
self._cumulative_step_time += float(step_time)
self._cumulative_step_count += 1

if self._log_interval > 0 and (self.global_steps % self._log_interval == 0 or is_last_step):
Copy link

Copilot AI Dec 15, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The _log_interval attribute is referenced but never initialized in init. This will cause an AttributeError on first access. The attribute should be initialized in the init method, likely from config.trainer.log_interval.

Copilot uses AI. Check for mistakes.
window = list(self._log_window)
total_tokens = sum(m.get("tokens", 0) or 0 for m in window)
total_time = sum(m.get("step_time", 0.0) or 0.0 for m in window)
avg_step_window = total_time / len(window) if window else 0.0
avg_step_overall = (
self._cumulative_step_time / self._cumulative_step_count
if self._cumulative_step_count > 0
else 0.0
)
throughput = total_tokens / total_time if total_time > 0 else 0.0
throughput_per_gpu = throughput / n_gpus if n_gpus else throughput
reward_vals = [m["reward"] for m in window if m.get("reward") is not None]
reward_avg = sum(reward_vals) / len(reward_vals) if reward_vals else None
eta_sec = (
(self.total_training_steps - self.global_steps) * avg_step_overall
if avg_step_overall > 0
else float("inf")
)
eta_str = time.strftime("%H:%M:%S", time.gmtime(eta_sec)) if eta_sec != float("inf") else "n/a"
lr_val = self.config.actor_rollout_ref.actor.optim.lr
ts = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
line = (
f"[{ts}] [train] step {self.global_steps}/{self.total_training_steps} | "
f"eta {eta_str} | step_win {avg_step_window:.2f}s | step_avg {avg_step_overall:.2f}s | "
f"toks/s {throughput/1e3:.1f}k ({throughput_per_gpu/1e3:.1f}k/gpu) | "
Comment on lines +632 to +638
Copy link

Copilot AI Dec 15, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Magic number 1e3 (1000) is used for converting throughput to thousands without explanation. Consider using a named constant like TOKENS_PER_KILOTOKEN = 1000 or adding a comment to clarify the unit conversion.

Copilot uses AI. Check for mistakes.
f"reward {reward_avg if reward_avg is not None else 'n/a'} | lr {lr_val}"
)
self._progress_log(line)
Copy link

Copilot AI Dec 15, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The _progress_log method is called but is never defined. This will result in an AttributeError at runtime. The method needs to be defined in the class, likely writing to a progress log file configured via config.trainer.progress_log_file.

Copilot uses AI. Check for mistakes.
self._log_sequence_window(metrics)

if is_last_step:
pprint(f"Final validation metrics: {last_val_metrics}")
progress_bar.close()
Expand Down
76 changes: 76 additions & 0 deletions examples/spider/configs/doubleGPU_qwen05b.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,76 @@
{
"algorithm": {
"adv_estimator": "grpo",
"use_kl_in_reward": false
},
"data": {
"train_files": "data/train_spider.parquet",
"val_files": "data/test_dev.parquet",
"train_batch_size": 16,
"max_prompt_length": 4096,
"max_response_length": 2048,
"truncation": "error"
},
"actor_rollout_ref": {
"rollout": {
"tensor_model_parallel_size": 1,
"n": 4,
"log_prob_micro_batch_size_per_gpu": 4,
"multi_turn": {
"format": "hermes"
},
"name": "vllm",
"gpu_memory_utilization": 0.35,
"engine_kwargs": {
"vllm": {
"enable_auto_tool_choice": true,
"tool_call_parser": "hermes"
}
}
},
"actor": {
"ppo_mini_batch_size": 16,
"ppo_micro_batch_size_per_gpu": 1,
"optim": {
"lr": 1e-6
},
"use_kl_loss": false,
"kl_loss_coef": 0.0,
"entropy_coeff": 0,
"clip_ratio_low": 0.2,
"clip_ratio_high": 0.3,
"fsdp_config": {
"param_offload": true,
"optimizer_offload": true
}
},
"ref": {
"log_prob_micro_batch_size_per_gpu": 4,
"fsdp_config": {
"param_offload": false
}
},
"model": {
"path": "/home/lthpc/student/LiTengfei/LLaMA-Factory/models/Qwen2.5-Coder-1.5B-Instruct",
Copy link

Copilot AI Dec 15, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Hardcoded absolute path containing user-specific directory makes this configuration non-portable. Consider using a relative path or environment variable substitution to work across different development environments.

Suggested change
"path": "/home/lthpc/student/LiTengfei/LLaMA-Factory/models/Qwen2.5-Coder-1.5B-Instruct",
"path": "${MODEL_DIR}/Qwen2.5-Coder-1.5B-Instruct",

Copilot uses AI. Check for mistakes.
Copy link

Copilot AI Dec 15, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The filename is 'doubleGPU_qwen05b.json' suggesting a 0.5B model, but the model path points to 'Qwen2.5-Coder-1.5B-Instruct' which is a 1.5B model. This inconsistency between the filename and the actual model configuration could lead to confusion.

Copilot uses AI. Check for mistakes.
"use_remove_padding": true,
"enable_gradient_checkpointing": true
}
},
"trainer": {
"n_gpus_per_node": 2,
"val_before_train": false,
"critic_warmup": 0,
"logger": [
"console",
"wandb"
],
"project_name": "AgentLightning",
"experiment_name": "spider",
"nnodes": 1,
"test_freq": 500,
"total_epochs": 2,
"log_interval": 1,
"save_freq": 500,
"default_local_dir": "/home/lthpc/student/LiTengfei/project/myfork/agent-lightning/examples/spider/ckpt_1.5b_grpo_config2"
Copy link

Copilot AI Dec 15, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Hardcoded absolute path containing user-specific directory makes this configuration non-portable. Consider using a relative path or environment variable substitution to work across different development environments.

Copilot uses AI. Check for mistakes.
}
}
76 changes: 76 additions & 0 deletions examples/spider/configs/singleGPU_qwen05b.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,76 @@
{
"algorithm": {
"adv_estimator": "grpo",
"use_kl_in_reward": false
},
"data": {
"train_files": "data/train_spider.parquet",
"val_files": "data/test_dev.parquet",
"train_batch_size": 16,
"max_prompt_length": 4096,
"max_response_length": 2048,
"truncation": "error"
},
"actor_rollout_ref": {
"rollout": {
"tensor_model_parallel_size": 1,
"n": 4,
"log_prob_micro_batch_size_per_gpu": 2,
"multi_turn": {
"format": "hermes"
},
"name": "vllm",
"gpu_memory_utilization": 0.4,
"engine_kwargs": {
"vllm": {
"enable_auto_tool_choice": true,
"tool_call_parser": "hermes"
}
}
},
"actor": {
"ppo_mini_batch_size": 16,
"ppo_micro_batch_size_per_gpu": 2,
"optim": {
"lr": 1e-06
},
"use_kl_loss": false,
"kl_loss_coef": 0.0,
"entropy_coeff": 0,
"clip_ratio_low": 0.2,
"clip_ratio_high": 0.3,
"fsdp_config": {
"param_offload": false,
"optimizer_offload": true
}
},
"ref": {
"log_prob_micro_batch_size_per_gpu": 2,
"fsdp_config": {
"param_offload": true
}
},
"model": {
"path": "/home/lthpc/student/LiTengfei/LLaMA-Factory/models/Qwen2.5-Coder-0.5B-Instruct",
Copy link

Copilot AI Dec 15, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Hardcoded absolute path containing user-specific directory makes this configuration non-portable. Consider using a relative path or environment variable substitution to work across different development environments.

Suggested change
"path": "/home/lthpc/student/LiTengfei/LLaMA-Factory/models/Qwen2.5-Coder-0.5B-Instruct",
"path": "${MODEL_PATH}",

Copilot uses AI. Check for mistakes.
"use_remove_padding": true,
"enable_gradient_checkpointing": true
}
},
"trainer": {
"n_gpus_per_node": 1,
"val_before_train": false,
"critic_warmup": 0,
"logger": [
"console",
"wandb"
],
"project_name": "AgentLightning",
"experiment_name": "spider",
"nnodes": 1,
"test_freq": 500,
"total_epochs": 2,
"log_interval": 1,
"save_freq": 500,
"default_local_dir": "/home/lthpc/student/LiTengfei/project/myfork/agent-lightning/examples/spider/ckpt_0.5b_grpo_config1"
Copy link

Copilot AI Dec 15, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Hardcoded absolute path containing user-specific directory makes this configuration non-portable. Consider using a relative path or environment variable substitution to work across different development environments.

Suggested change
"default_local_dir": "/home/lthpc/student/LiTengfei/project/myfork/agent-lightning/examples/spider/ckpt_0.5b_grpo_config1"
"default_local_dir": "examples/spider/ckpt_0.5b_grpo_config1"

Copilot uses AI. Check for mistakes.
}
}
Loading