From ff11f4591f773cbae1873acf8a8f39509c4e9ad7 Mon Sep 17 00:00:00 2001
From: thc1006 <84045975+thc1006@users.noreply.github.com>
Date: Sun, 28 Jun 2026 22:39:33 +0800
Subject: [PATCH] ENH: seed Monte Carlo per simulation index for
 worker-invariant inputs

MonteCarlo seeded the stochastic models once per worker (parallel) or once
before the loop (serial), so the generated inputs depended on how many workers
ran and which worker drew which simulation. Two runs of the same seed with
different worker counts produced different inputs.

Seed per simulation index instead: spawn one SeedSequence child per index from
the run's root seed, and reseed the stochastic models from child_seeds[i]
before simulation i. SeedSequence.spawn is invariant to the spawn count, so
index i always maps to the same child, including in append mode. Each index
seed is split three ways so the environment, rocket and flight do not share a
stream.

Inputs are now identical across serial, parallel(2) and parallel(N) for a fixed
seed, and a split append run reproduces a single run. This changes the numbers a
fixed seed produces (the per-model decorrelation, plus a serial index that now
counts from 0 like the parallel path already did), so stored baselines
regenerate.

Adds tests/unit/simulation/test_montecarlo_determinism.py covering serial
reproducibility, worker-invariance, append reproducibility, and the None-seed
path. Marked slow (each simulation rebuilds a full rocket) per the existing
test_monte_carlo_simulate convention.
---
 rocketpy/simulation/monte_carlo.py            |  77 +++-
 .../simulation/test_montecarlo_determinism.py | 391 ++++++++++++++++++
 2 files changed, 450 insertions(+), 18 deletions(-)
 create mode 100644 tests/unit/simulation/test_montecarlo_determinism.py

diff --git a/rocketpy/simulation/monte_carlo.py b/rocketpy/simulation/monte_carlo.py
index b6b71e0c3..ae45f85d9 100644
--- a/rocketpy/simulation/monte_carlo.py
+++ b/rocketpy/simulation/monte_carlo.py
@@ -291,16 +291,23 @@ def __run_in_serial(self, random_seed=None):
             start_time=time(),
         )
         inputs_json = ""
+        sim_idx = self._initial_sim_idx
+        # One independent seed per simulation index makes the generated inputs
+        # invariant to the execution mode and to the worker count (see
+        # ``__seed_components``). The ``SeedSequence`` is created fresh here so
+        # that index ``i`` always maps to the same child seed, including in
+        # ``append`` mode (``spawn`` is invariant to the spawn count).
+        child_seeds = np.random.SeedSequence(random_seed).spawn(
+            self.number_of_simulations
+        )
         try:
-            self.environment._set_stochastic(random_seed)
-            self.rocket._set_stochastic(random_seed)
-            self.flight._set_stochastic(random_seed)
             while sim_monitor.keep_simulating():
-                sim_monitor.increment()
+                sim_idx = sim_monitor.increment() - 1
+                self.__seed_components(child_seeds[sim_idx])
 
                 flight = self.__run_single_simulation()
-                inputs_json = self.__evaluate_flight_inputs(sim_monitor.count)
-                outputs_json = self.__evaluate_flight_outputs(flight, sim_monitor.count)
+                inputs_json = self.__evaluate_flight_inputs(sim_idx)
+                outputs_json = self.__evaluate_flight_outputs(flight, sim_idx)
                 self.__append_serial_results(inputs_json, outputs_json)
 
                 sim_monitor.print_update_status()
@@ -312,7 +319,7 @@ def __run_in_serial(self, random_seed=None):
 
         except Exception as error:
             self.__save_serial_error(
-                inputs_json, f"Error on iteration {sim_monitor.count}: {error}"
+                inputs_json, f"Error on iteration {sim_idx}: {error}"
             )
             raise error
 
@@ -359,13 +366,20 @@ def __run_in_parallel(self, random_seed=None, n_workers=None):
             )
 
             processes = []
-            seeds = np.random.SeedSequence(random_seed).spawn(n_workers)
+            # One independent seed per simulation index (not per worker) makes
+            # the generated inputs invariant to ``n_workers``: the worker that
+            # runs simulation ``i`` always seeds from ``child_seeds[i]``,
+            # regardless of how many workers there are. The full list is shared
+            # with every worker; the shared atomic counter assigns indices.
+            child_seeds = np.random.SeedSequence(random_seed).spawn(
+                self.number_of_simulations
+            )
 
-            for seed in seeds:
+            for _ in range(n_workers):
                 sim_producer = multiprocess.Process(
                     target=self.__sim_producer,
                     args=(
-                        seed,
+                        child_seeds,
                         sim_monitor,
                         mutex,
                         simulation_error_event,
@@ -407,13 +421,16 @@ def __validate_number_of_workers(self, n_workers):
             raise ValueError("Number of workers must be at least 2 for parallel mode.")
         return n_workers
 
-    def __sim_producer(self, seed, sim_monitor, mutex, error_event):  # pylint: disable=too-many-statements
+    def __sim_producer(self, child_seeds, sim_monitor, mutex, error_event):  # pylint: disable=too-many-statements
         """Simulation producer to be used in parallel by multiprocessing.
 
         Parameters
         ----------
-        seed : int
-            The seed to set the random number generator.
+        child_seeds : list[numpy.random.SeedSequence]
+            One seed sequence per simulation index. Before each simulation the
+            worker seeds the stochastic models from ``child_seeds[sim_idx]``,
+            where ``sim_idx`` is pulled from the shared atomic counter. This
+            keeps the generated inputs invariant to the number of workers.
         sim_monitor : _SimMonitor
             The simulation monitor object to keep track of the simulations.
         mutex : multiprocess.Lock
@@ -422,15 +439,14 @@ def __sim_producer(self, seed, sim_monitor, mutex, error_event):  # pylint: disa
             Event signaling an error occurred during the simulation.
         """
         try:
-            # Ensure Processes generate different random numbers
-            self.environment._set_stochastic(seed)
-            self.rocket._set_stochastic(seed)
-            self.flight._set_stochastic(seed)
-
             while sim_monitor.keep_simulating():
                 sim_idx = sim_monitor.increment() - 1
                 inputs_json, outputs_json = "", ""
 
+                # Seed per simulation index so the inputs are reproducible and
+                # independent of which worker happens to run this index.
+                self.__seed_components(child_seeds[sim_idx])
+
                 flight = self.__run_single_simulation()
                 inputs_json = self.__evaluate_flight_inputs(sim_idx)
                 outputs_json = self.__evaluate_flight_outputs(flight, sim_idx)
@@ -466,6 +482,31 @@ def __sim_producer(self, seed, sim_monitor, mutex, error_event):  # pylint: disa
             error_event.set()
             mutex.release()
 
+    def __seed_components(self, simulation_seed):
+        """Seed the stochastic models for a single simulation index.
+
+        The given seed sequence is split into three independent sub-streams so
+        that the environment, rocket and flight do not share the same random
+        draws (sharing a single seed would correlate their first sampled
+        values). Seeding per simulation index -- rather than once per worker --
+        is what makes the Monte Carlo inputs invariant to the execution mode
+        (serial vs parallel) and to the number of workers.
+
+        Parameters
+        ----------
+        simulation_seed : numpy.random.SeedSequence
+            The seed sequence assigned to the current simulation index. It is
+            spawned into three child sequences, one per stochastic model.
+
+        Returns
+        -------
+        None
+        """
+        env_seed, rocket_seed, flight_seed = simulation_seed.spawn(3)
+        self.environment._set_stochastic(env_seed)
+        self.rocket._set_stochastic(rocket_seed)
+        self.flight._set_stochastic(flight_seed)
+
     def __run_single_simulation(self):
         """Runs a single simulation and returns the inputs and outputs.
 
diff --git a/tests/unit/simulation/test_montecarlo_determinism.py b/tests/unit/simulation/test_montecarlo_determinism.py
new file mode 100644
index 000000000..2c4629db4
--- /dev/null
+++ b/tests/unit/simulation/test_montecarlo_determinism.py
@@ -0,0 +1,391 @@
+"""Determinism tests for :class:`rocketpy.simulation.MonteCarlo`.
+
+These tests verify that the random *inputs* generated by a Monte Carlo run are
+invariant to the execution mode (serial vs parallel) and to the number of
+parallel workers when a fixed ``random_seed`` is provided. This is the
+worker-invariance guarantee introduced by per-simulation-index seeding: the
+seed of simulation ``i`` is derived from ``i`` only, never from the worker that
+happens to run it.
+
+The trajectory integration (:class:`rocketpy.Flight`) is replaced by a
+lightweight stub so the tests run fast. Worker-invariance is a property of the
+*input sampling*, which happens in ``create_object``/``_randomize_*`` before the
+``Flight`` object is built and is independent of the (expensive) physics
+integration. Stubbing the module-level ``Flight`` symbol propagates to the
+parallel workers because the parallel backend uses the ``fork`` start method, so
+forked workers inherit the patched module.
+
+A dedicated ``stochastic_calisto_numpy_only`` rocket is used so that *all*
+randomness flows through the seeded numpy generator. List-valued stochastic
+attributes are sampled with the standard-library ``random.choice`` (an unseeded
+*global* generator) which the per-index seeding does not control; the fixture
+removes the only such attribute (a multi-element ``thrust_source``) so the
+generated inputs are byte-for-byte reproducible from the seed alone.
+
+These tests are additive and isolated; they do not modify any inherited test.
+"""
+
+import json
+
+import multiprocess
+import pytest
+
+import rocketpy.simulation.monte_carlo as mc_module
+from rocketpy.simulation import MonteCarlo
+from rocketpy.stochastic import StochasticRocket, StochasticSolidMotor
+
+# Every test here drives a real ``MonteCarlo.simulate`` run (the ``Flight``
+# physics is stubbed, but a full Calisto rocket is still rebuilt per simulation),
+# so they are gated behind ``--runslow`` to keep the default suite fast, matching
+# the existing ``test_monte_carlo_simulate`` convention.
+pytestmark = pytest.mark.slow
+
+# The stub-based parallel tests rely on workers inheriting the monkeypatched
+# ``Flight`` symbol, which only happens with the ``fork`` start method.
+requires_fork = pytest.mark.skipif(
+    multiprocess.get_start_method() != "fork",
+    reason="stub-based parallel determinism test requires the 'fork' start method",
+)
+
+
+class _StubFlight:
+    """Minimal stand-in for :class:`rocketpy.Flight` that skips integration.
+
+    The Monte Carlo input sampling happens in
+    ``StochasticRocket.create_object``, ``StochasticEnvironment.create_object``
+    and the ``StochasticFlight`` randomize helpers *before* the ``Flight``
+    object is built. Replacing ``Flight`` with this stub therefore preserves the
+    exact random draws while skipping the costly trajectory integration. Any
+    output attribute requested by the exporter resolves to ``0.0``.
+    """
+
+    def __init__(self, **kwargs):
+        # Accepts the keyword arguments that ``MonteCarlo`` passes to ``Flight``
+        # and intentionally ignores them.
+        pass
+
+    def __getattr__(self, name):
+        return 0.0
+
+
+@pytest.fixture
+def stochastic_calisto_numpy_only(
+    cesaroni_m1670,
+    calisto_robust,
+    stochastic_nose_cone,
+    stochastic_trapezoidal_fins,
+    stochastic_tail,
+    stochastic_rail_buttons,
+    stochastic_main_parachute,
+    stochastic_drogue_parachute,
+):
+    """A ``StochasticRocket`` whose randomness flows entirely through numpy.
+
+    It mirrors the shared ``stochastic_calisto`` fixture but gives the solid
+    motor a single fixed ``thrust_source`` instead of a multi-element list.
+    List-valued stochastic attributes are drawn with the standard-library
+    ``random.choice`` (an unseeded *global* generator) which per-simulation-index
+    seeding does not govern. Removing the only such attribute makes every
+    generated input reproducible from the seed alone, so serial and parallel
+    runs can be compared byte-for-byte.
+
+    Returns
+    -------
+    StochasticRocket
+        A stochastic Calisto rocket with no global-``random`` dependence.
+    """
+    motor = StochasticSolidMotor(
+        solid_motor=cesaroni_m1670,
+        burn_out_time=(4, 0.1),
+        grains_center_of_mass_position=0.001,
+        grain_density=50,
+        grain_separation=1 / 1000,
+        grain_initial_height=1 / 1000,
+        grain_initial_inner_radius=0.375 / 1000,
+        grain_outer_radius=0.375 / 1000,
+        total_impulse=(6500, 1000),
+        throat_radius=0.5 / 1000,
+        nozzle_radius=0.5 / 1000,
+        nozzle_position=0.001,
+    )
+    rocket = StochasticRocket(
+        rocket=calisto_robust,
+        radius=0.0127 / 2000,
+        mass=(15.426, 0.5, "normal"),
+        inertia_11=(6.321, 0),
+        inertia_22=0.01,
+        inertia_33=0.01,
+        center_of_mass_without_motor=0,
+    )
+    rocket.add_motor(motor, position=0.001)
+    rocket.add_nose(stochastic_nose_cone, position=(1.134, 0.001))
+    rocket.add_trapezoidal_fins(stochastic_trapezoidal_fins, position=(0.001, "normal"))
+    rocket.add_tail(stochastic_tail)
+    rocket.set_rail_buttons(
+        stochastic_rail_buttons, lower_button_position=(-0.618, 0.001, "normal")
+    )
+    rocket.add_parachute(stochastic_main_parachute)
+    rocket.add_parachute(stochastic_drogue_parachute)
+    return rocket
+
+
+def _read_inputs_by_index(input_file):
+    """Read a Monte Carlo ``.inputs.txt`` file into ``{index: raw_json_line}``.
+
+    Keying by the ``index`` field makes the comparison robust to the order in
+    which parallel workers append their results.
+
+    Parameters
+    ----------
+    input_file : str or pathlib.Path
+        Path to the Monte Carlo inputs file.
+
+    Returns
+    -------
+    dict[int, str]
+        Mapping of simulation index to the raw (stripped) JSON line written for
+        that simulation.
+    """
+    by_index = {}
+    with open(input_file, mode="r", encoding="utf-8") as rows:
+        for line in rows:
+            line = line.strip()
+            if not line:
+                continue
+            record = json.loads(line)
+            by_index[record["index"]] = line
+    return by_index
+
+
+def _simulate_inputs(
+    monkeypatch,
+    tmp_path,
+    environment,
+    rocket,
+    flight,
+    tag,
+    *,
+    n_simulations,
+    random_seed,
+    parallel,
+    n_workers=None,
+):
+    """Run a Monte Carlo simulation with a stubbed ``Flight`` and return inputs.
+
+    Parameters
+    ----------
+    monkeypatch : pytest.MonkeyPatch
+        Fixture used to swap ``Flight`` for :class:`_StubFlight`.
+    tmp_path : pathlib.Path
+        Temporary directory for the Monte Carlo output files.
+    environment, rocket, flight : StochasticModel
+        Stochastic models shared across runs.
+    tag : str
+        Unique filename stem so successive runs do not clobber each other.
+    n_simulations : int
+        Number of simulations to run.
+    random_seed : int or None
+        Seed forwarded to ``MonteCarlo.simulate``.
+    parallel : bool
+        Whether to run in parallel mode.
+    n_workers : int, optional
+        Number of workers for parallel mode.
+
+    Returns
+    -------
+    dict[int, str]
+        Mapping of simulation index to the raw input JSON line.
+    """
+    monkeypatch.setattr(mc_module, "Flight", _StubFlight)
+    montecarlo = MonteCarlo(
+        filename=str(tmp_path / tag),
+        environment=environment,
+        rocket=rocket,
+        flight=flight,
+    )
+    montecarlo.simulate(
+        number_of_simulations=n_simulations,
+        append=False,
+        parallel=parallel,
+        random_seed=random_seed,
+        n_workers=n_workers,
+    )
+    return _read_inputs_by_index(montecarlo.input_file)
+
+
+def test_montecarlo_inputs_serial_reproducible(
+    monkeypatch,
+    tmp_path,
+    stochastic_environment,
+    stochastic_calisto_numpy_only,
+    stochastic_flight,
+):
+    """Two serial runs with the same seed yield identical inputs per index."""
+    models = (
+        stochastic_environment,
+        stochastic_calisto_numpy_only,
+        stochastic_flight,
+    )
+    run_a = _simulate_inputs(
+        monkeypatch,
+        tmp_path,
+        *models,
+        "serial_a",
+        n_simulations=6,
+        random_seed=20240617,
+        parallel=False,
+    )
+    run_b = _simulate_inputs(
+        monkeypatch,
+        tmp_path,
+        *models,
+        "serial_b",
+        n_simulations=6,
+        random_seed=20240617,
+        parallel=False,
+    )
+
+    assert sorted(run_a) == list(range(6))
+    assert run_a == run_b
+
+
+@requires_fork
+def test_montecarlo_inputs_worker_invariant(
+    monkeypatch,
+    tmp_path,
+    stochastic_environment,
+    stochastic_calisto_numpy_only,
+    stochastic_flight,
+):
+    """Serial == parallel(2) == parallel(N): inputs are bit-identical per index.
+
+    The per-iteration input JSON for a given simulation index must be byte-for-
+    byte identical regardless of whether the run was serial, parallel with two
+    workers, or parallel with a larger number of workers.
+    """
+    n_simulations = 8
+    random_seed = 314159
+    models = (
+        stochastic_environment,
+        stochastic_calisto_numpy_only,
+        stochastic_flight,
+    )
+
+    serial = _simulate_inputs(
+        monkeypatch,
+        tmp_path,
+        *models,
+        "serial",
+        n_simulations=n_simulations,
+        random_seed=random_seed,
+        parallel=False,
+    )
+    parallel_2 = _simulate_inputs(
+        monkeypatch,
+        tmp_path,
+        *models,
+        "parallel_2",
+        n_simulations=n_simulations,
+        random_seed=random_seed,
+        parallel=True,
+        n_workers=2,
+    )
+    parallel_n = _simulate_inputs(
+        monkeypatch,
+        tmp_path,
+        *models,
+        "parallel_n",
+        n_simulations=n_simulations,
+        random_seed=random_seed,
+        parallel=True,
+        n_workers=4,
+    )
+
+    # Every index must be present exactly once in each run.
+    expected_indices = list(range(n_simulations))
+    assert sorted(serial) == expected_indices
+    assert sorted(parallel_2) == expected_indices
+    assert sorted(parallel_n) == expected_indices
+
+    # Worker-invariance: bit-identical input line per index across all modes.
+    for index in expected_indices:
+        assert serial[index] == parallel_2[index], (
+            f"serial vs parallel(2) inputs differ at index {index}"
+        )
+        assert serial[index] == parallel_n[index], (
+            f"serial vs parallel(4) inputs differ at index {index}"
+        )
+
+
+def test_montecarlo_none_seed_runs(
+    monkeypatch,
+    tmp_path,
+    stochastic_environment,
+    stochastic_calisto_numpy_only,
+    stochastic_flight,
+):
+    """``random_seed=None`` stays functional (non-reproducible but complete).
+
+    ``numpy.random.SeedSequence(None)`` pulls fresh entropy, so the run cannot
+    be reproduced, but it must still execute and export one record per index.
+    """
+    inputs = _simulate_inputs(
+        monkeypatch,
+        tmp_path,
+        stochastic_environment,
+        stochastic_calisto_numpy_only,
+        stochastic_flight,
+        "none_seed",
+        n_simulations=5,
+        random_seed=None,
+        parallel=False,
+    )
+
+    assert sorted(inputs) == list(range(5))
+
+
+def test_montecarlo_inputs_append_reproducible(
+    monkeypatch,
+    tmp_path,
+    stochastic_environment,
+    stochastic_calisto_numpy_only,
+    stochastic_flight,
+):
+    """Appending to a partial run reproduces a single full run, index for index.
+
+    ``SeedSequence.spawn`` is invariant to the spawn count, so simulation ``i``
+    draws the same seed whether it was produced in the first batch or in a later
+    ``append`` batch. A 3 + 3 (append) run must therefore match a single run of 6
+    with the same seed. ``number_of_simulations`` is the cumulative target, so the
+    append call passes 6, not the 3 additional simulations.
+    """
+    monkeypatch.setattr(mc_module, "Flight", _StubFlight)
+    seed = 2718281
+    environment = stochastic_environment
+    rocket = stochastic_calisto_numpy_only
+    flight = stochastic_flight
+
+    split = MonteCarlo(
+        filename=str(tmp_path / "split"),
+        environment=environment,
+        rocket=rocket,
+        flight=flight,
+    )
+    split.simulate(number_of_simulations=3, append=False, random_seed=seed)
+    split.simulate(number_of_simulations=6, append=True, random_seed=seed)
+    split_inputs = _read_inputs_by_index(split.input_file)
+
+    single_inputs = _simulate_inputs(
+        monkeypatch,
+        tmp_path,
+        environment,
+        rocket,
+        flight,
+        "single",
+        n_simulations=6,
+        random_seed=seed,
+        parallel=False,
+    )
+
+    assert sorted(split_inputs) == list(range(6))
+    assert split_inputs == single_inputs