From 84c7d0711b93e4a3567ef886127e896cbbb83caf Mon Sep 17 00:00:00 2001 From: Howard Zhao Date: Thu, 14 May 2026 15:25:37 +0800 Subject: [PATCH] Fix: wire temperature through to generation in LocalLLM The temperature param in __init__ was passed to Llama() at load time, but llama-cpp-python applies temperature at sampling time. The value was silently ignored and the library default took over for every call. This stores self.temperature in __init__ and uses it as the default in generate(). Also adds seed=-1 so each load gets fresh randomness, and removes the now-unused temperature kwarg from the Llama(...) call. --- shared/llm.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/shared/llm.py b/shared/llm.py index 289d6fb..c1a72e0 100644 --- a/shared/llm.py +++ b/shared/llm.py @@ -40,11 +40,12 @@ def __init__( """ self.llm = Llama( model_path=model_path, - temperature=temperature, n_ctx=n_ctx, verbose=False, + seed=-1, ) self.max_tokens = max_tokens + self.temperature = temperature def generate(self, prompt: str, temperature: float = None, stop: list[str] = None) -> str: """ @@ -64,8 +65,7 @@ def generate(self, prompt: str, temperature: float = None, stop: list[str] = Non "stop": stop if stop is not None else ["", "\n\n", "User:", "Assistant:"], } - if temperature is not None: - kwargs["temperature"] = temperature + kwargs["temperature"] = temperature if temperature is not None else self.temperature response = self.llm(**kwargs) return response["choices"][0]["text"].strip() \ No newline at end of file