google-deepmind · irhyl · Feb 21, 2026
diff --git a/gemma/gm/text/_sampling.py b/gemma/gm/text/_sampling.py
@@ -58,6 +58,8 @@ class RandomSampling(SamplingMethod):
 
   @typechecked
   def get_next_tokens(self, logits: Float['*B V'], rng: PRNGKey) -> Int['*B']:
+    if self.temperature < 1e-6:
+      return jnp.argmax(logits, axis=-1)
     return jax.random.categorical(rng, logits / self.temperature, axis=-1)
 
 
@@ -70,6 +72,9 @@ class TopkSampling(SamplingMethod):
 
   @typechecked
   def get_next_tokens(self, logits: Float['*B V'], rng: PRNGKey) -> Int['*B']:
+    if self.temperature < 1e-6:
+      return jnp.argmax(logits, axis=-1)
+
     logits, batch_shape = enp.flatten(logits, '... V')
 
     batch_size = logits.shape[0]
@@ -91,6 +96,9 @@ class TopPSampling(SamplingMethod):
 
   @typechecked
   def get_next_tokens(self, logits: Float['... V'], rng: PRNGKey) -> Int['...']:
+    if self.temperature < 1e-6:
+      return jnp.argmax(logits, axis=-1)
+
     # temperature scaling
     logits = logits / self.temperature
 
@@ -115,4 +123,3 @@ def get_next_tokens(self, logits: Float['... V'], rng: PRNGKey) -> Int['...']:
       )
 
     return jax.random.categorical(rng, logits, axis=-1)
-
diff --git a/gemma/gm/text/_sampling_test.py b/gemma/gm/text/_sampling_test.py
@@ -79,3 +79,35 @@ def test_top1_sampling_matches_greedy_sampling():
   tokens_top1 = top1_sampling.get_next_tokens(logits, rng)
   np.testing.assert_array_equal(tokens_greedy, tokens_top1)
 
+
+def test_random_sampling_zero_temperature():
+  """temperature=0.0 should behave like greedy (no division by zero)."""
+  greedy = gm.text.Greedy()
+  sampling = gm.text.RandomSampling(temperature=0.0)
+  rng = jax.random.PRNGKey(0)
+  logits = jax.numpy.array([[1.0, 3.0, 2.0]])
+  tokens = sampling.get_next_tokens(logits, rng)
+  tokens_greedy = greedy.get_next_tokens(logits, rng)
+  np.testing.assert_array_equal(tokens, tokens_greedy)
+
+
+def test_topk_sampling_zero_temperature():
+  """temperature=0.0 should behave like greedy (no division by zero)."""
+  greedy = gm.text.Greedy()
+  sampling = gm.text.TopkSampling(k=3, temperature=0.0)
+  rng = jax.random.PRNGKey(0)
+  logits = jax.numpy.array([[1.0, 3.0, 2.0, 0.5, 0.1]])
+  tokens = sampling.get_next_tokens(logits, rng)
+  tokens_greedy = greedy.get_next_tokens(logits, rng)
+  np.testing.assert_array_equal(tokens, tokens_greedy)
+
+
+def test_topp_sampling_zero_temperature():
+  """temperature=0.0 should behave like greedy (no division by zero)."""
+  greedy = gm.text.Greedy()
+  sampling = gm.text.TopPSampling(p=0.9, temperature=0.0)
+  rng = jax.random.PRNGKey(0)
+  logits = jax.numpy.array([[1.0, 3.0, 2.0, 0.5, 0.1]])
+  tokens = sampling.get_next_tokens(logits, rng)
+  tokens_greedy = greedy.get_next_tokens(logits, rng)
+  np.testing.assert_array_equal(tokens, tokens_greedy)