diff --git a/.DS_Store b/.DS_Store deleted file mode 100644 index 50909494..00000000 Binary files a/.DS_Store and /dev/null differ diff --git a/README.md b/README.md index 2041f489..d0a54a6e 100644 --- a/README.md +++ b/README.md @@ -118,6 +118,8 @@ pip install -e . # Optional: Install with RouterR1 support (requires GPU) # RouterR1 is tested with vllm==0.6.3 (torch==2.4.0); the extra pins these versions. +# NOTE: these pins have no wheels for Python >= 3.14 — use Python 3.10–3.13 for the +# [router-r1]/[all] extras. The base package supports Python 3.10–3.14. pip install -e ".[router-r1]" # Optional: Install all optional dependencies @@ -130,6 +132,13 @@ pip install -e ".[all]" pip install llmrouter-lib ``` +> **🔒 Security note — choose Python 3.10–3.13 for production.** `litellm` (the LLM API-calling +> layer) dropped Python 3.14 support at 1.83.8, so on 3.14 it is capped at 1.83.7, which carries known +> CVEs and pins vulnerable transitive deps. On **Python 3.10–3.13** the dependency floor resolves +> `litellm` to its fully-patched line and `pip-audit` reports no known vulnerabilities. Python 3.14 is +> supported for development; for security-sensitive deployments use 3.10–3.13 until `litellm` restores +> 3.14 support. See [.agent-reviews/dependency-audit.md](.agent-reviews/dependency-audit.md). + ### 🔑 Setting Up API Keys LLMRouter requires API keys to make LLM API calls for inference, chat, and data generation. Set the `API_KEYS` environment variable using one of the following formats: diff --git a/custom_routers/randomrouter/router.py b/custom_routers/randomrouter/router.py index 94bd075f..8b242c1e 100644 --- a/custom_routers/randomrouter/router.py +++ b/custom_routers/randomrouter/router.py @@ -10,7 +10,7 @@ """ import random -from typing import Any, Dict, List, Union +from typing import Any, Dict, List import torch.nn as nn from llmrouter.models.meta_router import MetaRouter diff --git a/custom_routers/thresholdrouter/router.py b/custom_routers/thresholdrouter/router.py index 5b5846e8..ae23b5a8 100644 --- a/custom_routers/thresholdrouter/router.py +++ b/custom_routers/thresholdrouter/router.py @@ -5,7 +5,6 @@ from typing import Any, Dict, List import torch import torch.nn as nn -import numpy as np from llmrouter.models.meta_router import MetaRouter @@ -129,9 +128,9 @@ def route_single(self, query_input: Dict[str, Any]) -> Dict[str, Any]: if not isinstance(embedding, torch.Tensor): embedding = torch.tensor(embedding, dtype=torch.float32) elif hasattr(self, 'query_embeddings') and 'query' in query_input: - # Try to get from loaded embeddings (if available) - query = query_input['query'] + # Try to get from loaded embeddings (if available). # This is a simplified version - real implementation would hash or lookup + # the query embedding here; for now we require an explicit 'embedding'. raise ValueError( "Query embedding not provided. " "Pass 'embedding' in query_input or implement embedding generation." diff --git a/llmrouter/cli/router_chat.py b/llmrouter/cli/router_chat.py index 121ab72c..1c7387a3 100644 --- a/llmrouter/cli/router_chat.py +++ b/llmrouter/cli/router_chat.py @@ -9,7 +9,7 @@ import argparse import os import yaml -from typing import Dict, Any, Optional +from typing import Any, Optional import gradio as gr diff --git a/llmrouter/data/api_calling_evaluation.py b/llmrouter/data/api_calling_evaluation.py index 712d39f5..89e4072d 100644 --- a/llmrouter/data/api_calling_evaluation.py +++ b/llmrouter/data/api_calling_evaluation.py @@ -21,19 +21,16 @@ import sys import time import json -import ast import re import argparse import yaml -from typing import Dict, List, Tuple, Optional, Union +from typing import Dict, List from collections import defaultdict from concurrent.futures import ThreadPoolExecutor, as_completed -import threading from pathlib import Path import pandas as pd import numpy as np -import torch from tqdm import tqdm # Allow importing local helper packages under repo `data/` (e.g., `human_eval`, `mbpp`) @@ -45,16 +42,13 @@ # Import utils from llmrouter.utils import ( setup_environment, - format_mc_prompt, format_gsm8k_prompt, format_math_prompt, - format_commonsense_qa_prompt, format_mbpp_prompt, format_humaneval_prompt, - generate_task_query, ProgressTracker, to_tensor, clean_df, - process_final_data, call_api + generate_task_query, ProgressTracker, call_api ) from llmrouter.utils.data_processing import process_unified_embeddings_and_routing from llmrouter.data.data_loader import DataLoader # Import evaluation functions -from llmrouter.utils import f1_score, exact_match_score, get_bert_score, evaluate_code, cem_score +from llmrouter.utils import f1_score, exact_match_score, get_bert_score, cem_score from llmrouter.utils.evaluation import last_boxed_only_string, remove_boxed, is_equiv try: from human_eval.evaluate_functional_correctness import entry_point_item @@ -321,7 +315,7 @@ def eval_perf(metric, prediction, ground_truth, task_name, task_id=None): answer = remove_boxed(string_in_last_boxed) if is_equiv(answer, ground_truth_processed): return 1 - except Exception as e: + except Exception: return 0 return 0 diff --git a/llmrouter/data/data.py b/llmrouter/data/data.py index a08a07b8..7d595597 100644 --- a/llmrouter/data/data.py +++ b/llmrouter/data/data.py @@ -53,11 +53,11 @@ ``` """ -from typing import Any, Dict, List, Optional, Union +from typing import Any, Dict, List, Optional from abc import ABC, abstractmethod from enum import Enum -from pydantic import BaseModel, ConfigDict, Field, field_validator +from pydantic import BaseModel, Field, field_validator class DataFormatType(Enum): diff --git a/llmrouter/data/data_generation.py b/llmrouter/data/data_generation.py index 30d783ba..640dfa9c 100644 --- a/llmrouter/data/data_generation.py +++ b/llmrouter/data/data_generation.py @@ -31,18 +31,13 @@ import json import argparse import yaml -from pathlib import Path -from typing import Dict, List, Tuple, Optional, Union -from collections import defaultdict -import pandas as pd import numpy as np -from tqdm import tqdm from datasets import load_dataset # Import utils from llmrouter.utils import ( - setup_environment, TASK_DESCRIPTIONS, CASE_NUM + setup_environment, CASE_NUM ) from llmrouter.data.data_loader import DataLoader from llmrouter.data import batch_vlm_describe_images diff --git a/llmrouter/data/data_loader.py b/llmrouter/data/data_loader.py index fcd2c57e..8aade1f6 100644 --- a/llmrouter/data/data_loader.py +++ b/llmrouter/data/data_loader.py @@ -1,6 +1,6 @@ import os import json -from llmrouter.utils import load_csv, load_jsonl, jsonl_to_csv, load_pt +from llmrouter.utils import load_jsonl, jsonl_to_csv, load_pt def load_json_file(path: str): diff --git a/llmrouter/data/generate_llm_embeddings.py b/llmrouter/data/generate_llm_embeddings.py index 4c4c1120..e8406e3a 100644 --- a/llmrouter/data/generate_llm_embeddings.py +++ b/llmrouter/data/generate_llm_embeddings.py @@ -18,8 +18,7 @@ import json import argparse import yaml -from pathlib import Path -from typing import Dict, List +from typing import Dict from llmrouter.utils import setup_environment, get_longformer_embedding from llmrouter.data.data_loader import DataLoader diff --git a/llmrouter/models/__init__.py b/llmrouter/models/__init__.py index 131d2ade..afcfdd40 100644 --- a/llmrouter/models/__init__.py +++ b/llmrouter/models/__init__.py @@ -1,107 +1,70 @@ -from .meta_router import MetaRouter -from .base_trainer import BaseTrainer - -from .smallest_llm import SmallestLLM -from .largest_llm import LargestLLM - -from .knnrouter import KNNRouter -from .knnrouter import KNNRouterTrainer - -from .svmrouter import SVMRouter -from .svmrouter import SVMRouterTrainer - -from .mlprouter import MLPRouter -from .mlprouter import MLPTrainer - -from .mfrouter import MFRouter -from .mfrouter import MFRouterTrainer - -from .elorouter import EloRouter -from .elorouter import EloRouterTrainer - -from .automix import AutomixRouter -from .automix import AutomixRouterTrainer - -from .routerdc import DCRouter -from .routerdc import DCTrainer - -from .hybrid_llm import HybridLLMRouter -from .hybrid_llm import HybridLLMTrainer - -try: - from .graphrouter import GraphRouter - from .graphrouter import GraphTrainer -except Exception: - GraphRouter = None - GraphTrainer = None - -try: - from .causallm_router import CausalLMRouter - from .causallm_router import CausalLMTrainer -except Exception: - CausalLMRouter = None - CausalLMTrainer = None - -try: - from .router_r1 import RouterR1 -except Exception: - RouterR1 = None - -try: - from .gmtrouter import GMTRouter - from .gmtrouter import GMTRouterTrainer -except Exception: - GMTRouter = None - GMTRouterTrainer = None - -try: - from .personalizedrouter import PersonalizedRouter - from .personalizedrouter import PersonalizedRouterTrainer -except Exception: - PersonalizedRouter = None - PersonalizedRouterTrainer = None - -__all__ = [ - "MetaRouter", - "BaseTrainer", - "SmallestLLM", - "LargestLLM", - - "KNNRouter", - "KNNRouterTrainer", - - "SVMRouter", - "SVMRouterTrainer", - - "MLPRouter", - "MLPTrainer", - - "MFRouter", - "MFRouterTrainer", - - "EloRouter", - "EloRouterTrainer", - - "DCRouter", - "DCTrainer", - - "AutomixRouter", - "AutomixRouterTrainer", - - "HybridLLMRouter", - "HybridLLMTrainer", - - "GraphRouter", - "GraphTrainer", - - "CausalLMRouter", - "CausalLMTrainer", - - "RouterR1", - - "GMTRouter", - "GMTRouterTrainer", - - "PersonalizedRouter", - "PersonalizedRouterTrainer", -] +"""Router and trainer registry. + +Names are resolved **lazily** (PEP 562 ``__getattr__``) so that +``import llmrouter.models`` does not eagerly import torch / torch-geometric / +transformers / peft for every router. Importing a specific name — e.g. +``from llmrouter.models import KNNRouter`` — loads only that router's submodule +(and hence only its dependencies). Optional routers whose heavy dependencies are +unavailable resolve to ``None``, preserving the previous behavior. +""" +import importlib + +# public attribute name -> (submodule, is_optional) +# optional entries resolve to None when their (heavy/GPU) dependencies are missing. +_REGISTRY = { + "MetaRouter": ("meta_router", False), + "BaseTrainer": ("base_trainer", False), + "SmallestLLM": ("smallest_llm", False), + "LargestLLM": ("largest_llm", False), + "KNNRouter": ("knnrouter", False), + "KNNRouterTrainer": ("knnrouter", False), + "SVMRouter": ("svmrouter", False), + "SVMRouterTrainer": ("svmrouter", False), + "MLPRouter": ("mlprouter", False), + "MLPTrainer": ("mlprouter", False), + "MFRouter": ("mfrouter", False), + "MFRouterTrainer": ("mfrouter", False), + "EloRouter": ("elorouter", False), + "EloRouterTrainer": ("elorouter", False), + "AutomixRouter": ("automix", False), + "AutomixRouterTrainer": ("automix", False), + "DCRouter": ("routerdc", False), + "DCTrainer": ("routerdc", False), + "HybridLLMRouter": ("hybrid_llm", False), + "HybridLLMTrainer": ("hybrid_llm", False), + "GraphRouter": ("graphrouter", True), + "GraphTrainer": ("graphrouter", True), + "CausalLMRouter": ("causallm_router", True), + "CausalLMTrainer": ("causallm_router", True), + "RouterR1": ("router_r1", True), + "GMTRouter": ("gmtrouter", True), + "GMTRouterTrainer": ("gmtrouter", True), + "PersonalizedRouter": ("personalizedrouter", True), + "PersonalizedRouterTrainer": ("personalizedrouter", True), +} + +__all__ = list(_REGISTRY) + + +def __getattr__(name): + """Lazily import and cache a registered router/trainer on first access.""" + try: + submodule, optional = _REGISTRY[name] + except KeyError: + raise AttributeError( + f"module {__name__!r} has no attribute {name!r}" + ) from None + try: + module = importlib.import_module(f".{submodule}", __name__) + value = getattr(module, name) + except Exception: + if optional: + value = None # optional router with missing deps -> None (legacy behavior) + else: + raise + globals()[name] = value # cache so later lookups bypass __getattr__ + return value + + +def __dir__(): + return sorted(__all__) diff --git a/llmrouter/models/automix/model.py b/llmrouter/models/automix/model.py index d3611901..38a55074 100644 --- a/llmrouter/models/automix/model.py +++ b/llmrouter/models/automix/model.py @@ -10,7 +10,7 @@ Adapted for LLMRouter framework with PyTorch nn.Module interface. """ -from typing import List, Union +from typing import List import numpy as np import pandas as pd @@ -230,11 +230,9 @@ def safe_max_key(d): """Find key with maximum value, handling inf/-inf.""" max_val = float('-inf') max_key = None - has_inf = False for k, v in d.items(): if isinstance(v, float): if v == float('inf'): - has_inf = True max_key = k # Prefer inf over finite values break if v > max_val: diff --git a/llmrouter/models/automix/router.py b/llmrouter/models/automix/router.py index beaa6592..d21f25af 100644 --- a/llmrouter/models/automix/router.py +++ b/llmrouter/models/automix/router.py @@ -12,16 +12,13 @@ import yaml import json import pandas as pd -import copy from typing import Any, Dict, List, Tuple, Optional -import torch.nn as nn from llmrouter.models.meta_router import MetaRouter -from llmrouter.utils import call_api, generate_task_query, calculate_task_performance +from llmrouter.utils import generate_task_query, calculate_task_performance from .model import AutomixModel from .methods import Threshold, POMDP, SelfConsistency -from .data_pipeline import prepare_automix_data def parse_size(size_str: str) -> float: @@ -182,7 +179,6 @@ def _prepare_data(self): tuple: (train_df, test_df) - Training and test DataFrames with all required columns """ data_cfg = self.cfg["data_path"] - hparam = self.cfg["hparam"] # Get paths for routing data train_path = data_cfg.get("routing_data_train") @@ -249,7 +245,7 @@ def _prepare_data(self): # Import data pipeline function from .data_pipeline import init_providers, run_solver_job, prepare_row from .data_pipeline import run_verification, compute_fraction_correct - from .data_pipeline import clean_answer, calculate_f1_for_models, categorize_rows + from .data_pipeline import calculate_f1_for_models, categorize_rows # Initialize API providers init_providers() @@ -455,30 +451,14 @@ def route_batch(self, batch: Optional[Any] = None, task_name: Optional[str] = No """ from .data_pipeline import prepare_row, run_solver_job, run_verification, compute_fraction_correct - # Determine which data to use - if batch is not None: - query_data = batch if isinstance(batch, list) else [batch] - else: - if hasattr(self, "query_data_test") and self.query_data_test is not None: - query_data = copy.copy(self.query_data_test) - else: - print("Warning: No batch provided and no test data available for batch routing.") - return [] - - # Get API endpoint from config (Note: Automix uses its own data_pipeline, so this is not currently used) - api_endpoint = self.cfg.get("api_endpoint") + query_data = self._resolve_query_data(batch) + if query_data is None: + return [] query_data_output = [] for row in query_data: # Handle both dict and non-dict inputs - if isinstance(row, dict): - row_copy = copy.copy(row) - original_query = row_copy.get("query", "") - row_task_name = row_copy.get("task_name", task_name) - else: - row_copy = {"query": str(row)} - original_query = str(row) - row_task_name = task_name + row_copy, original_query, row_task_name = self._normalize_row(row, task_name) # Step 1: Automix routing - call small model first query_df = pd.DataFrame([{'query': original_query}]) diff --git a/llmrouter/models/automix/trainer.py b/llmrouter/models/automix/trainer.py index a147e313..d083ccd1 100644 --- a/llmrouter/models/automix/trainer.py +++ b/llmrouter/models/automix/trainer.py @@ -8,8 +8,6 @@ """ import torch -import pandas as pd -from typing import Any from llmrouter.models.base_trainer import BaseTrainer diff --git a/llmrouter/models/causallm_router/router.py b/llmrouter/models/causallm_router/router.py index bcf8a3ad..a8910d68 100644 --- a/llmrouter/models/causallm_router/router.py +++ b/llmrouter/models/causallm_router/router.py @@ -1,6 +1,5 @@ from typing import Any, Dict, List, Optional import os -import torch import torch.nn as nn import copy from llmrouter.models.meta_router import MetaRouter @@ -213,15 +212,9 @@ def route_batch(self, batch: Optional[Any] = None, task_name: Optional[str] = No """ self._load_vllm_model() - # Determine which data to use - if batch is not None: - query_data = batch if isinstance(batch, list) else [batch] - else: - if hasattr(self, "query_data_test") and self.query_data_test is not None: - query_data = copy.copy(self.query_data_test) - else: - print("Warning: No batch provided and no test data available for batch routing.") - return [] + query_data = self._resolve_query_data(batch) + if query_data is None: + return [] # Build prompts for all queries (for routing only) prompts = [] @@ -239,15 +232,7 @@ def route_batch(self, batch: Optional[Any] = None, task_name: Optional[str] = No query_data_output = [] for i, row in enumerate(query_data): # Handle both dict and non-dict inputs - if isinstance(row, dict): - row_copy = copy.copy(row) - original_query = row_copy.get("query", "") - # Use task_name from row if available, otherwise use parameter - row_task_name = row_copy.get("task_name", task_name) - else: - row_copy = {"query": str(row)} - original_query = str(row) - row_task_name = task_name + row_copy, original_query, row_task_name = self._normalize_row(row, task_name) # Step 1: Get routed model name generated_text = outputs[i].outputs[0].text diff --git a/llmrouter/models/causallm_router/trainer.py b/llmrouter/models/causallm_router/trainer.py index 88d40846..6a39dca2 100644 --- a/llmrouter/models/causallm_router/trainer.py +++ b/llmrouter/models/causallm_router/trainer.py @@ -1,6 +1,5 @@ import os import torch -from typing import Optional from datasets import Dataset from transformers import ( AutoModelForCausalLM, diff --git a/llmrouter/models/elorouter/router.py b/llmrouter/models/elorouter/router.py index 25fa56de..085f783e 100644 --- a/llmrouter/models/elorouter/router.py +++ b/llmrouter/models/elorouter/router.py @@ -102,27 +102,14 @@ def route_batch(self, batch: Optional[Any] = None, task_name: Optional[str] = No # Select best model once (same for all queries) best_model = self._select_best_model() - # Determine which data to use - if batch is not None: - query_data = batch if isinstance(batch, list) else [batch] - else: - if hasattr(self, "query_data_test") and self.query_data_test is not None: - query_data = copy.copy(self.query_data_test) - else: - print("Warning: No batch provided and no test data available for batch routing.") - return [] + query_data = self._resolve_query_data(batch) + if query_data is None: + return [] query_data_output = [] for row in query_data: # Handle both dict and non-dict inputs - if isinstance(row, dict): - row_copy = copy.copy(row) - original_query = row_copy.get("query", "") - row_task_name = row_copy.get("task_name", task_name) - else: - row_copy = {"query": str(row)} - original_query = str(row) - row_task_name = task_name + row_copy, original_query, row_task_name = self._normalize_row(row, task_name) # Step 1: Route - always use best model model_name = best_model diff --git a/llmrouter/models/gmtrouter/data_loader.py b/llmrouter/models/gmtrouter/data_loader.py index 0ae75aa0..0ec4bd26 100644 --- a/llmrouter/models/gmtrouter/data_loader.py +++ b/llmrouter/models/gmtrouter/data_loader.py @@ -8,8 +8,7 @@ import json import torch import numpy as np -from typing import Dict, List, Tuple, Any, Optional -from pathlib import Path +from typing import Dict, List, Tuple, Any from collections import defaultdict from enum import Enum from pydantic import ValidationError @@ -278,7 +277,6 @@ def _build_graph_from_interactions(self, interactions: List[Dict]): for conv_turn in conversation: query_text = conv_turn['query'] query_emb = np.array(conv_turn.get('query_emb', [])) - response_text = conv_turn.get('response', '') rating = conv_turn.get('rating', 0.0) # Set embedding dimension diff --git a/llmrouter/models/gmtrouter/models.py b/llmrouter/models/gmtrouter/models.py index 68c360c9..d7b86751 100644 --- a/llmrouter/models/gmtrouter/models.py +++ b/llmrouter/models/gmtrouter/models.py @@ -9,7 +9,7 @@ import torch import torch.nn as nn import torch.nn.functional as F -from typing import Dict, List, Tuple, Optional +from typing import Dict, List, Tuple class HeteroGNN(nn.Module): diff --git a/llmrouter/models/gmtrouter/router.py b/llmrouter/models/gmtrouter/router.py index c3fd5a15..85c38c2f 100644 --- a/llmrouter/models/gmtrouter/router.py +++ b/llmrouter/models/gmtrouter/router.py @@ -11,11 +11,10 @@ Training and inference are fully integrated into LLMRouter CLI. """ -from typing import Any, Dict, List, Optional, Tuple +from typing import Any, Dict, List, Tuple import os import json import torch -import numpy as np from llmrouter.models.meta_router import MetaRouter from llmrouter.models.gmtrouter.data_loader import GMTRouterDataLoader, detect_data_format diff --git a/llmrouter/models/gmtrouter/trainer.py b/llmrouter/models/gmtrouter/trainer.py index 350a8401..495737a9 100644 --- a/llmrouter/models/gmtrouter/trainer.py +++ b/llmrouter/models/gmtrouter/trainer.py @@ -7,11 +7,9 @@ """ import torch -import torch.nn as nn import torch.nn.functional as F import os -from typing import Dict, List, Tuple, Any, Optional -from tqdm import tqdm +from typing import Dict, Tuple import numpy as np from llmrouter.models.base_trainer import BaseTrainer diff --git a/llmrouter/models/graphrouter/router.py b/llmrouter/models/graphrouter/router.py index 2f06097e..d17821ed 100644 --- a/llmrouter/models/graphrouter/router.py +++ b/llmrouter/models/graphrouter/router.py @@ -349,15 +349,9 @@ def route_batch(self, batch: Optional[Any] = None, task_name: Optional[str] = No state_dict = torch.load(load_model_path, map_location='cpu') self.gnn_predictor.model.load_state_dict(state_dict) - # Determine which data to use - if batch is not None: - query_data = batch if isinstance(batch, list) else [batch] - else: - if hasattr(self, "query_data_test") and self.query_data_test is not None: - query_data = copy.copy(self.query_data_test) - else: - print("Warning: No batch provided and no test data available for batch routing.") - return [] + query_data = self._resolve_query_data(batch) + if query_data is None: + return [] # Prepare test embeddings test_embeddings = [] @@ -427,14 +421,7 @@ def route_batch(self, batch: Optional[Any] = None, task_name: Optional[str] = No query_data_output = [] for i, row in enumerate(query_data): # Handle both dict and non-dict inputs - if isinstance(row, dict): - row_copy = copy.copy(row) - original_query = row_copy.get("query", "") - row_task_name = row_copy.get("task_name", task_name) - else: - row_copy = {"query": str(row)} - original_query = str(row) - row_task_name = task_name + row_copy, original_query, row_task_name = self._normalize_row(row, task_name) # Step 1: Get routed model name model_idx = test_predictions[i].item() diff --git a/llmrouter/models/hybrid_llm/router.py b/llmrouter/models/hybrid_llm/router.py index 9302e55c..903fd59f 100644 --- a/llmrouter/models/hybrid_llm/router.py +++ b/llmrouter/models/hybrid_llm/router.py @@ -1,7 +1,6 @@ from typing import Any, Dict, List, Optional import os import numpy as np -import torch import torch.nn as nn import copy @@ -230,28 +229,14 @@ def route_batch(self, batch: Optional[Any] = None, task_name: Optional[str] = No load_path = os.path.join(project_root, self.cfg["model_path"]["load_model_path"]) self.mlp_model = load_model(load_path) - # Determine which data to use - if batch is not None: - query_data = batch if isinstance(batch, list) else [batch] - else: - if hasattr(self, "query_data_test") and self.query_data_test is not None: - query_data = copy.copy(self.query_data_test) - else: - print("Warning: No batch provided and no test data available for batch routing.") - return [] + query_data = self._resolve_query_data(batch) + if query_data is None: + return [] query_data_output = [] for row in query_data: # Handle both dict and non-dict inputs - if isinstance(row, dict): - row_copy = copy.copy(row) - original_query = row_copy.get("query", "") - # Use task_name from row if available, otherwise use parameter - row_task_name = row_copy.get("task_name", task_name) - else: - row_copy = {"query": str(row)} - original_query = str(row) - row_task_name = task_name + row_copy, original_query, row_task_name = self._normalize_row(row, task_name) # Step 1: Route the query to get model_name emb = [get_longformer_embedding(original_query).numpy()] diff --git a/llmrouter/models/hybrid_llm/trainer.py b/llmrouter/models/hybrid_llm/trainer.py index ea3d2bfb..b1d55671 100644 --- a/llmrouter/models/hybrid_llm/trainer.py +++ b/llmrouter/models/hybrid_llm/trainer.py @@ -1,5 +1,4 @@ import os -import torch from llmrouter.models.base_trainer import BaseTrainer from llmrouter.utils import save_model, load_model diff --git a/llmrouter/models/knnmultiroundrouter/router.py b/llmrouter/models/knnmultiroundrouter/router.py index e6e3c772..47e1d2e0 100644 --- a/llmrouter/models/knnmultiroundrouter/router.py +++ b/llmrouter/models/knnmultiroundrouter/router.py @@ -279,28 +279,14 @@ def route_batch(self, batch: Optional[Any] = None, task_name: Optional[str] = No - "task_performance": evaluation score (0.0-1.0) if ground truth available - "success": whether the pipeline succeeded """ - # Determine which data to use - if batch is not None: - query_data = batch if isinstance(batch, list) else [batch] - else: - if hasattr(self, "query_data_test") and self.query_data_test is not None: - query_data = copy.copy(self.query_data_test) - else: - print("Warning: No batch provided and no test data available for batch routing.") - return [] + query_data = self._resolve_query_data(batch) + if query_data is None: + return [] query_data_output = [] for row in query_data: # Handle both dict and non-dict inputs - if isinstance(row, dict): - row_copy = copy.copy(row) - original_query = row_copy.get("query", "") - # Use task_name from row if available, otherwise use parameter - row_task_name = row_copy.get("task_name", task_name) - else: - row_copy = {"query": str(row)} - original_query = str(row) - row_task_name = task_name + row_copy, original_query, row_task_name = self._normalize_row(row, task_name) # Format query if task_name is provided if row_task_name: diff --git a/llmrouter/models/knnmultiroundrouter/trainer.py b/llmrouter/models/knnmultiroundrouter/trainer.py index 7293f5b3..4410977b 100644 --- a/llmrouter/models/knnmultiroundrouter/trainer.py +++ b/llmrouter/models/knnmultiroundrouter/trainer.py @@ -1,4 +1,3 @@ -import torch from llmrouter.models.base_trainer import BaseTrainer from llmrouter.utils import save_model, load_model import os diff --git a/llmrouter/models/knnrouter/router.py b/llmrouter/models/knnrouter/router.py index 21fc97f6..79502d3c 100644 --- a/llmrouter/models/knnrouter/router.py +++ b/llmrouter/models/knnrouter/router.py @@ -1,8 +1,5 @@ -from typing import Any, Dict, List, Optional, Union +from typing import Any, Dict, List, Optional import os -import pickle -import random -import numpy as np import torch.nn as nn import copy from sklearn.neighbors import KNeighborsClassifier @@ -132,28 +129,14 @@ def route_batch(self, batch: Optional[Any] = None, task_name: Optional[str] = No load_model_path = os.path.join(project_root, self.cfg["model_path"]["load_model_path"]) self.knn_model = load_model(load_model_path) - # Determine which data to use - if batch is not None: - query_data = batch if isinstance(batch, list) else [batch] - else: - if hasattr(self, "query_data_test") and self.query_data_test is not None: - query_data = copy.copy(self.query_data_test) - else: - print("Warning: No batch provided and no test data available for batch routing.") - return [] + query_data = self._resolve_query_data(batch) + if query_data is None: + return [] query_data_output = [] for row in query_data: # Handle both dict and non-dict inputs - if isinstance(row, dict): - row_copy = copy.copy(row) - original_query = row_copy.get("query", "") - # Use task_name from row if available, otherwise use parameter - row_task_name = row_copy.get("task_name", task_name) - else: - row_copy = {"query": str(row)} - original_query = str(row) - row_task_name = task_name + row_copy, original_query, row_task_name = self._normalize_row(row, task_name) # Step 1: Route the query to get model_name query_embedding = [get_longformer_embedding(original_query).numpy()] diff --git a/llmrouter/models/knnrouter/trainer.py b/llmrouter/models/knnrouter/trainer.py index a631a2f1..30a53c07 100644 --- a/llmrouter/models/knnrouter/trainer.py +++ b/llmrouter/models/knnrouter/trainer.py @@ -1,4 +1,3 @@ -import torch from llmrouter.models.base_trainer import BaseTrainer from llmrouter.utils import save_model, load_model import os diff --git a/llmrouter/models/largest_llm/router.py b/llmrouter/models/largest_llm/router.py index 6add1438..452fb833 100644 --- a/llmrouter/models/largest_llm/router.py +++ b/llmrouter/models/largest_llm/router.py @@ -159,27 +159,14 @@ def route_batch(self, batch: Optional[Any] = None, task_name: Optional[str] = No key=lambda k: parse_size(self.llm_data[k].get("size", "0B")), ) - # Determine which data to use - if batch is not None: - query_data = batch if isinstance(batch, list) else [batch] - else: - if hasattr(self, "query_data_test") and self.query_data_test is not None: - query_data = copy.copy(self.query_data_test) - else: - print("Warning: No batch provided and no test data available for batch routing.") - return [] + query_data = self._resolve_query_data(batch) + if query_data is None: + return [] query_data_output = [] for row in query_data: # Handle both dict and non-dict inputs - if isinstance(row, dict): - row_copy = copy.copy(row) - original_query = row_copy.get("query", "") - row_task_name = row_copy.get("task_name", task_name) - else: - row_copy = {"query": str(row)} - original_query = str(row) - row_task_name = task_name + row_copy, original_query, row_task_name = self._normalize_row(row, task_name) # Step 1: Route - always use largest model model_name = largest_model_name diff --git a/llmrouter/models/llmmultiroundrouter/router.py b/llmrouter/models/llmmultiroundrouter/router.py index 9c188395..fd6ea205 100644 --- a/llmrouter/models/llmmultiroundrouter/router.py +++ b/llmrouter/models/llmmultiroundrouter/router.py @@ -1,5 +1,4 @@ from typing import Any, Dict, List, Optional -import os import torch.nn as nn import copy from llmrouter.models.meta_router import MetaRouter @@ -199,28 +198,14 @@ def route_batch(self, batch: Optional[Any] = None, task_name: Optional[str] = No - "task_performance": evaluation score (0.0-1.0) if ground truth available - "success": whether the pipeline succeeded """ - # Determine which data to use - if batch is not None: - query_data = batch if isinstance(batch, list) else [batch] - else: - if hasattr(self, "query_data_test") and self.query_data_test is not None: - query_data = copy.copy(self.query_data_test) - else: - print("Warning: No batch provided and no test data available for batch routing.") - return [] + query_data = self._resolve_query_data(batch) + if query_data is None: + return [] query_data_output = [] for row in query_data: # Handle both dict and non-dict inputs - if isinstance(row, dict): - row_copy = copy.copy(row) - original_query = row_copy.get("query", "") - # Use task_name from row if available, otherwise use parameter - row_task_name = row_copy.get("task_name", task_name) - else: - row_copy = {"query": str(row)} - original_query = str(row) - row_task_name = task_name + row_copy, original_query, row_task_name = self._normalize_row(row, task_name) # Format query if task_name is provided if row_task_name: diff --git a/llmrouter/models/meta_router.py b/llmrouter/models/meta_router.py index 2b31c8dc..4c847c43 100644 --- a/llmrouter/models/meta_router.py +++ b/llmrouter/models/meta_router.py @@ -1,3 +1,4 @@ +import copy import os import yaml from abc import ABC, abstractmethod @@ -98,6 +99,43 @@ def route_single(self, batch): """ raise NotImplementedError + # ------------------------------------------------------------------ + # Shared helpers for concrete routers + # ------------------------------------------------------------------ + + def _resolve_query_data(self, batch): + """Resolve the rows to route. + + Uses an explicit ``batch`` when provided, otherwise falls back to the + loaded ``query_data_test``. Returns a list of rows, or ``None`` when + neither source is available (callers should then return an empty result). + """ + if batch is not None: + return batch if isinstance(batch, list) else [batch] + if getattr(self, "query_data_test", None) is not None: + return copy.copy(self.query_data_test) + print("Warning: No batch provided and no test data available for batch routing.") + return None + + @staticmethod + def _normalize_row(row, task_name): + """Normalize one routing input row. + + Returns ``(row_copy, original_query, row_task_name)``. A dict row is + shallow-copied and its ``query``/``task_name`` read out; any non-dict + row is wrapped as ``{"query": str(row)}``. The per-row task name falls + back to the batch-level ``task_name`` when absent. + """ + if isinstance(row, dict): + row_copy = copy.copy(row) + original_query = row_copy.get("query", "") + row_task_name = row_copy.get("task_name", task_name) + else: + row_copy = {"query": str(row)} + original_query = str(row) + row_task_name = task_name + return row_copy, original_query, row_task_name + def forward(self, batch): """ PyTorch-compatible forward method. diff --git a/llmrouter/models/mfrouter/router.py b/llmrouter/models/mfrouter/router.py index 9f41e75f..546b77d3 100644 --- a/llmrouter/models/mfrouter/router.py +++ b/llmrouter/models/mfrouter/router.py @@ -1,6 +1,5 @@ from typing import Any, Dict, List, Optional import os -import numpy as np import torch import torch.nn as nn import torch.nn.functional as F @@ -165,27 +164,14 @@ def route_batch(self, batch: Optional[Any] = None, task_name: Optional[str] = No self.load_model_path = os.path.join(project_root, self.cfg["model_path"]["load_model_path"]) model = self._load_mf_model() - # Determine which data to use - if batch is not None: - query_data = batch if isinstance(batch, list) else [batch] - else: - if hasattr(self, "query_data_test") and self.query_data_test is not None: - query_data = copy.copy(self.query_data_test) - else: - print("Warning: No batch provided and no test data available for batch routing.") - return [] + query_data = self._resolve_query_data(batch) + if query_data is None: + return [] query_data_output = [] for row in query_data: # Handle both dict and non-dict inputs - if isinstance(row, dict): - row_copy = copy.copy(row) - original_query = row_copy.get("query", "") - row_task_name = row_copy.get("task_name", task_name) - else: - row_copy = {"query": str(row)} - original_query = str(row) - row_task_name = task_name + row_copy, original_query, row_task_name = self._normalize_row(row, task_name) # Step 1: Route the query q_emb = self.embed_query(original_query).to(model.device) diff --git a/llmrouter/models/mlprouter/router.py b/llmrouter/models/mlprouter/router.py index efca28ba..af6bf7a0 100644 --- a/llmrouter/models/mlprouter/router.py +++ b/llmrouter/models/mlprouter/router.py @@ -1,6 +1,5 @@ from typing import Any, Dict, List, Optional import os -import numpy as np import torch import torch.nn as nn import torch.nn.functional as F @@ -212,27 +211,14 @@ def route_batch(self, batch: Optional[Any] = None, task_name: Optional[str] = No self.load_model_path = os.path.join(project_root, self.cfg["model_path"]["load_model_path"]) model, model_type = self._load_mlp_model() - # Determine which data to use - if batch is not None: - query_data = batch if isinstance(batch, list) else [batch] - else: - if hasattr(self, "query_data_test") and self.query_data_test is not None: - query_data = copy.copy(self.query_data_test) - else: - print("Warning: No batch provided and no test data available for batch routing.") - return [] + query_data = self._resolve_query_data(batch) + if query_data is None: + return [] query_data_output = [] for row in query_data: # Handle both dict and non-dict inputs - if isinstance(row, dict): - row_copy = copy.copy(row) - original_query = row_copy.get("query", "") - row_task_name = row_copy.get("task_name", task_name) - else: - row_copy = {"query": str(row)} - original_query = str(row) - row_task_name = task_name + row_copy, original_query, row_task_name = self._normalize_row(row, task_name) # Step 1: Route the query query_embedding = get_longformer_embedding(original_query) diff --git a/llmrouter/models/personalizedrouter/graph_nn.py b/llmrouter/models/personalizedrouter/graph_nn.py index dad4df1a..f1ac01ec 100644 --- a/llmrouter/models/personalizedrouter/graph_nn.py +++ b/llmrouter/models/personalizedrouter/graph_nn.py @@ -7,10 +7,6 @@ import torch.nn as nn from torch.optim import AdamW from sklearn.metrics import f1_score -import numpy as np -from collections import Counter -import random -import math class FeatureAlign(nn.Module): @@ -208,11 +204,25 @@ def train_validate(self,data,data_validate,data_for_test): f1 = f1_score(label_idx_, observe_idx_, average='macro') loss_validate = self.criterion(predicted_edges_validate.reshape(-1), data_validate.label[mask_validate].reshape(-1)) - if f1>=best_f1: + # Model selection is driven by validation f1 only. When validation + # improves we checkpoint the model and record THAT checkpoint's test + # score as the reported result. We intentionally do NOT take a max() + # over per-epoch test scores: doing so selects the best epoch by + # peeking at the test set, producing an optimistic, leakage-biased + # number that also wouldn't match the saved (best-validation) model. + improved = f1 >= best_f1 + if improved: best_f1 = f1 torch.save(self.model.state_dict(), self.save_path) - test_result,test_loss=self.test(data_for_test,self.config['model_path']) - best_test_result = max(best_test_result, test_result) + test_result, test_loss = self.test(data_for_test, self.config['model_path']) + best_test_result = test_result + + print( + f"[PersonalizedRouter] epoch {epoch + 1}/{self.config['train_epoch']} | " + f"val_acc={validate_accuracy:.4f} val_loss={loss_validate.item():.4f} " + f"val_f1={f1:.4f} | best_val_f1={best_f1:.4f} test@best_val={best_test_result:.4f}" + + (" *" if improved else "") + ) self.best_test_result = best_test_result def test(self,data,model_path): diff --git a/llmrouter/models/router_r1/router.py b/llmrouter/models/router_r1/router.py index 8411104b..4c357102 100644 --- a/llmrouter/models/router_r1/router.py +++ b/llmrouter/models/router_r1/router.py @@ -1,6 +1,5 @@ import os import re -import copy from typing import Any, Dict, List, Optional import torch @@ -199,7 +198,10 @@ def route_single(self, query: Dict[str, Any], return_details: bool = False): from vllm import LLM, SamplingParams except ImportError as e: raise ImportError( - "RouterR1 requires the optional dependency `vllm`. Install it with: `pip install vllm`." + "RouterR1 requires the optional dependency `vllm` (GPU only). " + "Install it with `pip install -e \".[router-r1]\"`. " + "Note: vllm==0.6.3 has no wheels for Python >= 3.14 — use Python " + "3.10–3.13 for Router-R1. All other routers support Python 3.10–3.14." ) from e if not torch.cuda.is_available(): @@ -334,27 +336,14 @@ def route_batch(self, batch: Optional[Any] = None, task_name: Optional[str] = No list of dict: A list of query dictionaries with response, tokens, and performance metrics. """ - # Determine which data to use - if batch is not None: - query_data = batch if isinstance(batch, list) else [batch] - else: - if hasattr(self, "query_data_test") and self.query_data_test is not None: - query_data = copy.copy(self.query_data_test) - else: - print("Warning: No batch provided and no test data available for batch routing.") - return [] + query_data = self._resolve_query_data(batch) + if query_data is None: + return [] query_data_output = [] for row in query_data: # Handle both dict and non-dict inputs - if isinstance(row, dict): - row_copy = copy.copy(row) - original_query = row_copy.get("query", "") - row_task_name = row_copy.get("task_name", task_name) - else: - row_copy = {"query": str(row)} - original_query = str(row) - row_task_name = task_name + row_copy, original_query, row_task_name = self._normalize_row(row, task_name) # Step 1: Route using RouterR1's agentic reasoning # Note: RouterR1 doesn't assign a specific model_name since it's an agentic system diff --git a/llmrouter/models/routerdc/dcdata_utils.py b/llmrouter/models/routerdc/dcdata_utils.py index 2ba66458..48c876d3 100644 --- a/llmrouter/models/routerdc/dcdata_utils.py +++ b/llmrouter/models/routerdc/dcdata_utils.py @@ -7,9 +7,8 @@ """ import json -import os import numpy as np -from collections import Counter, defaultdict +from collections import defaultdict from sklearn.cluster import KMeans from typing import List, Dict @@ -127,10 +126,6 @@ def add_clusters(data: List[Dict], n_clusters: int = 3, random_state: int = 42) for i, item in enumerate(data): item['cluster_id'] = int(cluster_labels[i]) - # Print cluster distribution - cluster_dist = Counter(cluster_labels) - # print(f" Cluster distribution: {dict(cluster_dist)}") - return data diff --git a/llmrouter/models/routerdc/router.py b/llmrouter/models/routerdc/router.py index 8cda95a5..5b6714dd 100644 --- a/llmrouter/models/routerdc/router.py +++ b/llmrouter/models/routerdc/router.py @@ -15,7 +15,7 @@ import copy import torch from typing import Any, Dict, List, Optional -from transformers import AutoTokenizer, DebertaV2Model, DebertaV2Tokenizer +from transformers import DebertaV2Model, DebertaV2Tokenizer from llmrouter.models.meta_router import MetaRouter from llmrouter.utils import call_api, generate_task_query, calculate_task_performance from .dcmodel import RouterModule diff --git a/llmrouter/models/routerdc/trainer.py b/llmrouter/models/routerdc/trainer.py index 11c93ade..e936f283 100644 --- a/llmrouter/models/routerdc/trainer.py +++ b/llmrouter/models/routerdc/trainer.py @@ -13,7 +13,6 @@ import os import json import torch -import torch.nn as nn from torch.utils.data import DataLoader from tqdm import tqdm diff --git a/llmrouter/models/smallest_llm/router.py b/llmrouter/models/smallest_llm/router.py index 930fbc3c..123f6a37 100644 --- a/llmrouter/models/smallest_llm/router.py +++ b/llmrouter/models/smallest_llm/router.py @@ -158,27 +158,14 @@ def route_batch(self, batch: Optional[Any] = None, task_name: Optional[str] = No key=lambda k: parse_size(self.llm_data[k].get("size", "0B")), ) - # Determine which data to use - if batch is not None: - query_data = batch if isinstance(batch, list) else [batch] - else: - if hasattr(self, "query_data_test") and self.query_data_test is not None: - query_data = copy.copy(self.query_data_test) - else: - print("Warning: No batch provided and no test data available for batch routing.") - return [] + query_data = self._resolve_query_data(batch) + if query_data is None: + return [] query_data_output = [] for row in query_data: # Handle both dict and non-dict inputs - if isinstance(row, dict): - row_copy = copy.copy(row) - original_query = row_copy.get("query", "") - row_task_name = row_copy.get("task_name", task_name) - else: - row_copy = {"query": str(row)} - original_query = str(row) - row_task_name = task_name + row_copy, original_query, row_task_name = self._normalize_row(row, task_name) # Step 1: Route - always use smallest model model_name = smallest_model_name diff --git a/llmrouter/models/svmrouter/router.py b/llmrouter/models/svmrouter/router.py index 626533c7..aa639107 100644 --- a/llmrouter/models/svmrouter/router.py +++ b/llmrouter/models/svmrouter/router.py @@ -1,7 +1,5 @@ from typing import Any, Dict, List, Optional import os -import pickle -import numpy as np import torch.nn as nn import copy from sklearn.svm import SVC @@ -115,27 +113,14 @@ def route_batch(self, batch: Optional[Any] = None, task_name: Optional[str] = No load_model_path = os.path.join(project_root, self.cfg["model_path"]["load_model_path"]) self.svm_model = load_model(load_model_path) - # Determine which data to use - if batch is not None: - query_data = batch if isinstance(batch, list) else [batch] - else: - if hasattr(self, "query_data_test") and self.query_data_test is not None: - query_data = copy.copy(self.query_data_test) - else: - print("Warning: No batch provided and no test data available for batch routing.") - return [] + query_data = self._resolve_query_data(batch) + if query_data is None: + return [] query_data_output = [] for row in query_data: # Handle both dict and non-dict inputs - if isinstance(row, dict): - row_copy = copy.copy(row) - original_query = row_copy.get("query", "") - row_task_name = row_copy.get("task_name", task_name) - else: - row_copy = {"query": str(row)} - original_query = str(row) - row_task_name = task_name + row_copy, original_query, row_task_name = self._normalize_row(row, task_name) # Step 1: Route the query query_embedding = [get_longformer_embedding(original_query).numpy()] diff --git a/llmrouter/models/svmrouter/trainer.py b/llmrouter/models/svmrouter/trainer.py index faa076eb..63d5b5ca 100644 --- a/llmrouter/models/svmrouter/trainer.py +++ b/llmrouter/models/svmrouter/trainer.py @@ -1,4 +1,3 @@ -import torch from llmrouter.models.base_trainer import BaseTrainer from llmrouter.utils import save_model, load_model import os diff --git a/llmrouter/serve/server.py b/llmrouter/serve/server.py index 6e96b03d..0dc2d47d 100644 --- a/llmrouter/serve/server.py +++ b/llmrouter/serve/server.py @@ -15,10 +15,7 @@ import json import os import sys -import re -import time -import uuid -from typing import AsyncGenerator, Optional, Dict, Any, List +from typing import AsyncGenerator, Optional, Dict, List # FastAPI try: diff --git a/llmrouter/utils/data_convert.py b/llmrouter/utils/data_convert.py index 7f814146..bb6d18c1 100644 --- a/llmrouter/utils/data_convert.py +++ b/llmrouter/utils/data_convert.py @@ -37,9 +37,21 @@ } # Import prompt template from centralized prompts module +from functools import lru_cache + from llmrouter.prompts import load_prompt_template -PROMPT_TEMPLATE = load_prompt_template("data_conversion") + +@lru_cache(maxsize=1) +def _get_prompt_template() -> str: + """Load the data-conversion prompt template on first use. + + Loaded lazily (and cached) rather than at import time so that importing this + module never performs disk I/O — importing it previously raised + FileNotFoundError wherever the template path didn't resolve (e.g. a + non-editable install). It is only needed when actually converting data. + """ + return load_prompt_template("data_conversion") def normalize_model_name(model_name: str) -> str: @@ -119,7 +131,7 @@ def generate_id(index: int, task_name: str = "default") -> str: def generate_prompt(question: str) -> List[Dict]: """Generate prompt format""" - content = PROMPT_TEMPLATE.format(question=question) + content = _get_prompt_template().format(question=question) return [{"content": content, "role": "user"}] diff --git a/llmrouter/utils/dataframe_utils.py b/llmrouter/utils/dataframe_utils.py index 492745c9..f4eaa72a 100644 --- a/llmrouter/utils/dataframe_utils.py +++ b/llmrouter/utils/dataframe_utils.py @@ -2,7 +2,6 @@ DataFrame processing utilities for LLMRouter scripts """ -import pandas as pd def clean_df(df): """Clean and standardize DataFrame columns""" diff --git a/llmrouter/utils/evaluation.py b/llmrouter/utils/evaluation.py index ac2b788d..b4951b2d 100644 --- a/llmrouter/utils/evaluation.py +++ b/llmrouter/utils/evaluation.py @@ -4,7 +4,7 @@ import string import pickle from collections import Counter -from typing import Any, List, Optional, Tuple, Union +from typing import Any, List, Optional, Tuple import numpy as np @@ -355,7 +355,7 @@ def strip_string(string): # remove percentage string = string.replace("\\%", "") - string = string.replace("\%", "") # noqa: W605 + string = string.replace("\\%", "") # " 0." equivalent to " ." and "{0." equivalent to "{." Alternatively, add "0" if "." is the start of the string string = string.replace(" .", " 0.") diff --git a/llmrouter/utils/model_loader.py b/llmrouter/utils/model_loader.py index 87f09bf3..9f05388f 100644 --- a/llmrouter/utils/model_loader.py +++ b/llmrouter/utils/model_loader.py @@ -1,7 +1,6 @@ -import os import pickle import torch -from typing import Any, Union +from typing import Any from pathlib import Path diff --git a/llmrouter/utils/setup.py b/llmrouter/utils/setup.py index 1649467b..604814e3 100644 --- a/llmrouter/utils/setup.py +++ b/llmrouter/utils/setup.py @@ -3,7 +3,6 @@ """ import os -import sys def setup_environment(): """Setup common environment variables and paths""" diff --git a/openclaw_router/__main__.py b/openclaw_router/__main__.py index 920134cb..f6b91f4a 100644 --- a/openclaw_router/__main__.py +++ b/openclaw_router/__main__.py @@ -10,7 +10,6 @@ """ import argparse -import sys from .server import create_app, run_server from .config import OpenClawConfig diff --git a/openclaw_router/media.py b/openclaw_router/media.py index 86f238d3..9b53fe07 100644 --- a/openclaw_router/media.py +++ b/openclaw_router/media.py @@ -14,9 +14,9 @@ import httpx import os import tempfile -from dataclasses import dataclass, field +from dataclasses import dataclass from pathlib import Path -from typing import Any, Dict, List, Optional, Tuple, Union +from typing import Any, List, Optional, Tuple # Try to import optional video processing library try: diff --git a/openclaw_router/memory.py b/openclaw_router/memory.py index ab4162b4..7a30cce5 100644 --- a/openclaw_router/memory.py +++ b/openclaw_router/memory.py @@ -16,10 +16,9 @@ import json import os import threading -from dataclasses import asdict from datetime import datetime, timezone from pathlib import Path -from typing import Any, Dict, List, Optional, Tuple +from typing import Any, Dict, List, Optional import numpy as np diff --git a/openclaw_router/server.py b/openclaw_router/server.py index cb952744..b0643bd9 100755 --- a/openclaw_router/server.py +++ b/openclaw_router/server.py @@ -11,7 +11,6 @@ """ import json -import os import re import sys from typing import AsyncGenerator, Optional, Dict, Any, List @@ -35,7 +34,7 @@ except ImportError: from config import OpenClawConfig, LLMConfig, MODELS_WITHOUT_SYSTEM_ROLE, MODEL_CONTEXT_LIMITS from routers import OpenClawRouter, _safe_log - from media import process_multimodal_content, MediaConfig + from media import process_multimodal_content # ============================================================ @@ -489,7 +488,6 @@ async def chat_completions(request: ChatRequest): # Extract user query for routing (with optional media understanding) user_query = "" - media_description = None # Find and process the last user message last_user_idx = None @@ -510,7 +508,6 @@ async def chat_completions(request: ChatRequest): raw_content, config.media, fallback_key=together_key ) user_query = processed_text[:500] - media_description = media_desc if media_desc: print(f"[Media] Processed: {media_desc[:80]}...") # IMPORTANT: Replace the message content with processed text diff --git a/pyproject.toml b/pyproject.toml index 9fce4708..18378341 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -27,7 +27,11 @@ dependencies = [ "datasets>=2.14", "pydantic>=2.0", "gradio>=4.0", - "litellm>=1.0", + # Security: CVE-2026-42203/42208/42271 fixed in 1.83.7, CVE-2026-40217 in 1.83.10, + # CVE-2026-49468 in 1.84.0. litellm dropped Python 3.14 support at 1.83.8, so on + # 3.14 the ceiling is 1.83.7 (3 of 5 fixed); 3.10–3.13 get the fully-patched line. + "litellm>=1.84.0; python_version < '3.14'", + "litellm>=1.83.7; python_version >= '3.14'", "peft>=0.7", "torch-geometric>=2.3", "scipy>=1.10", @@ -42,6 +46,9 @@ classifiers = [ "Programming Language :: Python :: 3", "Programming Language :: Python :: 3.10", "Programming Language :: Python :: 3.11", + "Programming Language :: Python :: 3.12", + "Programming Language :: Python :: 3.13", + "Programming Language :: Python :: 3.14", "License :: OSI Approved :: MIT License", "Operating System :: OS Independent", "Development Status :: 3 - Alpha", @@ -56,10 +63,24 @@ classifiers = [ "Bug Tracker" = "https://github.com/ulab-uiuc/LLMRouter/issues" [project.optional-dependencies] -# RouterR1 requires vLLM (GPU only) -router-r1 = ["vllm==0.6.3", "torch==2.4.0", "openai>=1.0"] +# RouterR1 requires vLLM (GPU only). +# vllm==0.6.3 / torch==2.4.0 have no wheels for Python >= 3.14, so those pins are +# gated to Python < 3.14 via environment markers. This keeps `pip install .[router-r1]` +# (and `.[all]`) from failing dependency resolution on 3.14; the GPU stack is simply +# not installed there. Router-R1 itself supports Python 3.10–3.13 only — on 3.14 it +# raises a clear ImportError at use time (see llmrouter/models/router_r1/router.py). +# The base package (and all other routers) supports Python 3.10–3.14. +router-r1 = [ + "vllm==0.6.3; python_version < '3.14'", + "torch==2.4.0; python_version < '3.14'", + "openai>=1.0", +] # All optional dependencies -all = ["vllm==0.6.3", "torch==2.4.0", "openai>=1.0"] +all = [ + "vllm==0.6.3; python_version < '3.14'", + "torch==2.4.0; python_version < '3.14'", + "openai>=1.0", +] [project.scripts] llmrouter = "llmrouter.cli.router_main:main" diff --git a/scripts/memory_benchmark.py b/scripts/memory_benchmark.py new file mode 100644 index 00000000..fae6f775 --- /dev/null +++ b/scripts/memory_benchmark.py @@ -0,0 +1,94 @@ +#!/usr/bin/env python3 +"""Memory benchmark for LLMRouter. + +Measures peak resident memory (RSS) for a set of import scenarios, each run in a +fresh subprocess so footprints don't accumulate. The headline metric is the peak +RSS of importing the full router suite (`import llmrouter.models`) — i.e. what it +costs to load the library before doing any routing. + +Usage: + python scripts/memory_benchmark.py # human-readable table + python scripts/memory_benchmark.py --json # machine-readable (for the loop) + python scripts/memory_benchmark.py --target 450 # exit 0 if headline <= 450 MB +""" +import argparse +import json +import platform +import resource +import statistics +import subprocess +import sys + +# scenario name -> import statement(s) to execute before measuring peak RSS +SCENARIOS = { + "interpreter": "pass", + "import llmrouter": "import llmrouter", + "import llmrouter.models": "import llmrouter.models", + "import cli.router_inference": "from llmrouter.cli import router_inference", + "import openclaw_router.server": "import openclaw_router.server", +} +HEADLINE = "import llmrouter.models" +REPEATS = 3 # median of N subprocess runs to damp noise + + +def _peak_rss_mb(code: str) -> float: + snippet = ( + "import resource, platform\n" + f"{code}\n" + "rss = resource.getrusage(resource.RUSAGE_SELF).ru_maxrss\n" + # ru_maxrss is bytes on macOS, kilobytes on Linux + "rss = rss/1048576 if platform.system() == 'Darwin' else rss/1024\n" + "print(rss)\n" + ) + out = subprocess.check_output( + [sys.executable, "-c", snippet], text=True, stderr=subprocess.DEVNULL + ) + return float(out.strip()) + + +def run() -> dict: + results = {} + for name, code in SCENARIOS.items(): + samples = [_peak_rss_mb(code) for _ in range(REPEATS)] + results[name] = round(statistics.median(samples), 1) + return results + + +def main() -> int: + ap = argparse.ArgumentParser() + ap.add_argument("--json", action="store_true", help="emit JSON") + ap.add_argument("--target", type=float, default=None, + help="headline RSS (MB) target; exit 0 if met") + args = ap.parse_args() + + results = run() + headline = results[HEADLINE] + interp = results["interpreter"] + + if args.json: + print(json.dumps({ + "scenarios_mb": results, + "headline_metric": HEADLINE, + "headline_mb": headline, + "library_overhead_mb": round(headline - interp, 1), + "python": platform.python_version(), + }, indent=2)) + else: + print(f"\n=== LLMRouter memory benchmark (peak RSS, median of {REPEATS}) ===") + print(f"Python {platform.python_version()} on {platform.system()}\n") + for name, mb in results.items(): + bar = "#" * int(mb / 15) + print(f" {name:<32} {mb:>7.1f} MB {bar}") + print(f"\n HEADLINE ({HEADLINE}): {headline:.1f} MB") + print(f" library overhead over bare interpreter: {headline - interp:.1f} MB") + + if args.target is not None: + ok = headline <= args.target + print(f"\n target {args.target:.1f} MB -> {'MET' if ok else 'NOT met'} " + f"(headline {headline:.1f} MB)") + return 0 if ok else 2 + return 0 + + +if __name__ == "__main__": + sys.exit(main()) diff --git a/tests/test_meta_router_helpers.py b/tests/test_meta_router_helpers.py new file mode 100644 index 00000000..fc29d912 --- /dev/null +++ b/tests/test_meta_router_helpers.py @@ -0,0 +1,122 @@ +"""Regression tests for the shared MetaRouter helpers. + +Locks the behavior of `MetaRouter._resolve_query_data` and `_normalize_row` +(extracted from 14 routers in commits 5507b09 / 51440e5) and proves they are +equivalent to the original inline blocks they replaced. + +Runnable directly (`python tests/test_meta_router_helpers.py`) or via pytest. +""" +import copy + +import torch.nn as nn + +from llmrouter.models.meta_router import MetaRouter + + +class _DummyRouter(MetaRouter): + """Minimal concrete MetaRouter (no YAML/data) for unit-testing helpers.""" + + def route_batch(self, batch): # pragma: no cover - not exercised + return [] + + def route_single(self, batch): # pragma: no cover - not exercised + return None + + +# --- reference implementations: the ORIGINAL inline logic, verbatim --------- +def _orig_resolve(self, batch): + if batch is not None: + query_data = batch if isinstance(batch, list) else [batch] + else: + if hasattr(self, "query_data_test") and self.query_data_test is not None: + query_data = copy.copy(self.query_data_test) + else: + return [] # original returned [] directly (helper returns None -> caller returns []) + return query_data + + +def _orig_normalize(row, task_name): + if isinstance(row, dict): + row_copy = copy.copy(row) + original_query = row_copy.get("query", "") + row_task_name = row_copy.get("task_name", task_name) + else: + row_copy = {"query": str(row)} + original_query = str(row) + row_task_name = task_name + return row_copy, original_query, row_task_name + + +def _make(): + return _DummyRouter(model=nn.Identity()) + + +def test_resolve_explicit_batch(): + r = _make() + assert r._resolve_query_data({"query": "q"}) == [{"query": "q"}] # dict -> wrapped + assert r._resolve_query_data([{"a": 1}, {"b": 2}]) == [{"a": 1}, {"b": 2}] # list -> as-is + + +def test_resolve_fallback_to_test_set(): + r = _make() + r.query_data_test = [{"query": "x"}] + out = r._resolve_query_data(None) + assert out == [{"query": "x"}] + assert out is not r.query_data_test # must be a copy, not the same object + + +def test_resolve_no_data_returns_none(): + r = _make() # no query_data_test attribute set + assert r._resolve_query_data(None) is None + r.query_data_test = None + assert r._resolve_query_data(None) is None + + +def test_resolve_empty_test_set_edge(): + r = _make() + r.query_data_test = [] # falsy but not None -> resolves to [] (not the None branch) + assert r._resolve_query_data(None) == [] + + +def test_normalize_dict_and_non_dict(): + r = _make() + assert r._normalize_row({"query": "hi", "task_name": "t1"}, "fallback") == ( + {"query": "hi", "task_name": "t1"}, "hi", "t1", + ) + assert r._normalize_row({"query": "hi"}, "fallback") == ( + {"query": "hi"}, "hi", "fallback", # task_name falls back + ) + assert r._normalize_row("raw string", "fallback") == ( + {"query": "raw string"}, "raw string", "fallback", + ) + + +def test_equivalence_to_original_inline_logic(): + """The crux: helpers must match the original blocks across an input matrix.""" + r = _make() + + # _resolve_query_data: None-result <=> original []-result; else equal. + resolve_cases = [{"query": "a"}, [{"query": "b"}], [], None] + for qdt in ([{"query": "t"}], [], None, "MISSING"): + if qdt == "MISSING": + if hasattr(r, "query_data_test"): + del r.query_data_test + else: + r.query_data_test = qdt + for batch in resolve_cases: + new = r._resolve_query_data(batch) + old = _orig_resolve(r, batch) + # map helper's None -> [] to compare with original's [] sentinel + assert (new if new is not None else []) == old, (qdt, batch, new, old) + + # _normalize_row: must be identical across dict/non-dict/missing-keys. + for row in [{"query": "q", "task_name": "t"}, {"query": "q"}, {}, "s", 42, None]: + assert r._normalize_row(row, "fb") == _orig_normalize(row, "fb"), row + + +if __name__ == "__main__": + fns = [v for k, v in sorted(globals().items()) if k.startswith("test_") and callable(v)] + for fn in fns: + fn() + print(f" PASS {fn.__name__}") + print(f"\n{len(fns)} tests passed")