diff --git a/rocketpy/simulation/monte_carlo.py b/rocketpy/simulation/monte_carlo.py index b6b71e0c3..ae45f85d9 100644 --- a/rocketpy/simulation/monte_carlo.py +++ b/rocketpy/simulation/monte_carlo.py @@ -291,16 +291,23 @@ def __run_in_serial(self, random_seed=None): start_time=time(), ) inputs_json = "" + sim_idx = self._initial_sim_idx + # One independent seed per simulation index makes the generated inputs + # invariant to the execution mode and to the worker count (see + # ``__seed_components``). The ``SeedSequence`` is created fresh here so + # that index ``i`` always maps to the same child seed, including in + # ``append`` mode (``spawn`` is invariant to the spawn count). + child_seeds = np.random.SeedSequence(random_seed).spawn( + self.number_of_simulations + ) try: - self.environment._set_stochastic(random_seed) - self.rocket._set_stochastic(random_seed) - self.flight._set_stochastic(random_seed) while sim_monitor.keep_simulating(): - sim_monitor.increment() + sim_idx = sim_monitor.increment() - 1 + self.__seed_components(child_seeds[sim_idx]) flight = self.__run_single_simulation() - inputs_json = self.__evaluate_flight_inputs(sim_monitor.count) - outputs_json = self.__evaluate_flight_outputs(flight, sim_monitor.count) + inputs_json = self.__evaluate_flight_inputs(sim_idx) + outputs_json = self.__evaluate_flight_outputs(flight, sim_idx) self.__append_serial_results(inputs_json, outputs_json) sim_monitor.print_update_status() @@ -312,7 +319,7 @@ def __run_in_serial(self, random_seed=None): except Exception as error: self.__save_serial_error( - inputs_json, f"Error on iteration {sim_monitor.count}: {error}" + inputs_json, f"Error on iteration {sim_idx}: {error}" ) raise error @@ -359,13 +366,20 @@ def __run_in_parallel(self, random_seed=None, n_workers=None): ) processes = [] - seeds = np.random.SeedSequence(random_seed).spawn(n_workers) + # One independent seed per simulation index (not per worker) makes + # the generated inputs invariant to ``n_workers``: the worker that + # runs simulation ``i`` always seeds from ``child_seeds[i]``, + # regardless of how many workers there are. The full list is shared + # with every worker; the shared atomic counter assigns indices. + child_seeds = np.random.SeedSequence(random_seed).spawn( + self.number_of_simulations + ) - for seed in seeds: + for _ in range(n_workers): sim_producer = multiprocess.Process( target=self.__sim_producer, args=( - seed, + child_seeds, sim_monitor, mutex, simulation_error_event, @@ -407,13 +421,16 @@ def __validate_number_of_workers(self, n_workers): raise ValueError("Number of workers must be at least 2 for parallel mode.") return n_workers - def __sim_producer(self, seed, sim_monitor, mutex, error_event): # pylint: disable=too-many-statements + def __sim_producer(self, child_seeds, sim_monitor, mutex, error_event): # pylint: disable=too-many-statements """Simulation producer to be used in parallel by multiprocessing. Parameters ---------- - seed : int - The seed to set the random number generator. + child_seeds : list[numpy.random.SeedSequence] + One seed sequence per simulation index. Before each simulation the + worker seeds the stochastic models from ``child_seeds[sim_idx]``, + where ``sim_idx`` is pulled from the shared atomic counter. This + keeps the generated inputs invariant to the number of workers. sim_monitor : _SimMonitor The simulation monitor object to keep track of the simulations. mutex : multiprocess.Lock @@ -422,15 +439,14 @@ def __sim_producer(self, seed, sim_monitor, mutex, error_event): # pylint: disa Event signaling an error occurred during the simulation. """ try: - # Ensure Processes generate different random numbers - self.environment._set_stochastic(seed) - self.rocket._set_stochastic(seed) - self.flight._set_stochastic(seed) - while sim_monitor.keep_simulating(): sim_idx = sim_monitor.increment() - 1 inputs_json, outputs_json = "", "" + # Seed per simulation index so the inputs are reproducible and + # independent of which worker happens to run this index. + self.__seed_components(child_seeds[sim_idx]) + flight = self.__run_single_simulation() inputs_json = self.__evaluate_flight_inputs(sim_idx) outputs_json = self.__evaluate_flight_outputs(flight, sim_idx) @@ -466,6 +482,31 @@ def __sim_producer(self, seed, sim_monitor, mutex, error_event): # pylint: disa error_event.set() mutex.release() + def __seed_components(self, simulation_seed): + """Seed the stochastic models for a single simulation index. + + The given seed sequence is split into three independent sub-streams so + that the environment, rocket and flight do not share the same random + draws (sharing a single seed would correlate their first sampled + values). Seeding per simulation index -- rather than once per worker -- + is what makes the Monte Carlo inputs invariant to the execution mode + (serial vs parallel) and to the number of workers. + + Parameters + ---------- + simulation_seed : numpy.random.SeedSequence + The seed sequence assigned to the current simulation index. It is + spawned into three child sequences, one per stochastic model. + + Returns + ------- + None + """ + env_seed, rocket_seed, flight_seed = simulation_seed.spawn(3) + self.environment._set_stochastic(env_seed) + self.rocket._set_stochastic(rocket_seed) + self.flight._set_stochastic(flight_seed) + def __run_single_simulation(self): """Runs a single simulation and returns the inputs and outputs. diff --git a/tests/unit/simulation/test_montecarlo_determinism.py b/tests/unit/simulation/test_montecarlo_determinism.py new file mode 100644 index 000000000..2c4629db4 --- /dev/null +++ b/tests/unit/simulation/test_montecarlo_determinism.py @@ -0,0 +1,391 @@ +"""Determinism tests for :class:`rocketpy.simulation.MonteCarlo`. + +These tests verify that the random *inputs* generated by a Monte Carlo run are +invariant to the execution mode (serial vs parallel) and to the number of +parallel workers when a fixed ``random_seed`` is provided. This is the +worker-invariance guarantee introduced by per-simulation-index seeding: the +seed of simulation ``i`` is derived from ``i`` only, never from the worker that +happens to run it. + +The trajectory integration (:class:`rocketpy.Flight`) is replaced by a +lightweight stub so the tests run fast. Worker-invariance is a property of the +*input sampling*, which happens in ``create_object``/``_randomize_*`` before the +``Flight`` object is built and is independent of the (expensive) physics +integration. Stubbing the module-level ``Flight`` symbol propagates to the +parallel workers because the parallel backend uses the ``fork`` start method, so +forked workers inherit the patched module. + +A dedicated ``stochastic_calisto_numpy_only`` rocket is used so that *all* +randomness flows through the seeded numpy generator. List-valued stochastic +attributes are sampled with the standard-library ``random.choice`` (an unseeded +*global* generator) which the per-index seeding does not control; the fixture +removes the only such attribute (a multi-element ``thrust_source``) so the +generated inputs are byte-for-byte reproducible from the seed alone. + +These tests are additive and isolated; they do not modify any inherited test. +""" + +import json + +import multiprocess +import pytest + +import rocketpy.simulation.monte_carlo as mc_module +from rocketpy.simulation import MonteCarlo +from rocketpy.stochastic import StochasticRocket, StochasticSolidMotor + +# Every test here drives a real ``MonteCarlo.simulate`` run (the ``Flight`` +# physics is stubbed, but a full Calisto rocket is still rebuilt per simulation), +# so they are gated behind ``--runslow`` to keep the default suite fast, matching +# the existing ``test_monte_carlo_simulate`` convention. +pytestmark = pytest.mark.slow + +# The stub-based parallel tests rely on workers inheriting the monkeypatched +# ``Flight`` symbol, which only happens with the ``fork`` start method. +requires_fork = pytest.mark.skipif( + multiprocess.get_start_method() != "fork", + reason="stub-based parallel determinism test requires the 'fork' start method", +) + + +class _StubFlight: + """Minimal stand-in for :class:`rocketpy.Flight` that skips integration. + + The Monte Carlo input sampling happens in + ``StochasticRocket.create_object``, ``StochasticEnvironment.create_object`` + and the ``StochasticFlight`` randomize helpers *before* the ``Flight`` + object is built. Replacing ``Flight`` with this stub therefore preserves the + exact random draws while skipping the costly trajectory integration. Any + output attribute requested by the exporter resolves to ``0.0``. + """ + + def __init__(self, **kwargs): + # Accepts the keyword arguments that ``MonteCarlo`` passes to ``Flight`` + # and intentionally ignores them. + pass + + def __getattr__(self, name): + return 0.0 + + +@pytest.fixture +def stochastic_calisto_numpy_only( + cesaroni_m1670, + calisto_robust, + stochastic_nose_cone, + stochastic_trapezoidal_fins, + stochastic_tail, + stochastic_rail_buttons, + stochastic_main_parachute, + stochastic_drogue_parachute, +): + """A ``StochasticRocket`` whose randomness flows entirely through numpy. + + It mirrors the shared ``stochastic_calisto`` fixture but gives the solid + motor a single fixed ``thrust_source`` instead of a multi-element list. + List-valued stochastic attributes are drawn with the standard-library + ``random.choice`` (an unseeded *global* generator) which per-simulation-index + seeding does not govern. Removing the only such attribute makes every + generated input reproducible from the seed alone, so serial and parallel + runs can be compared byte-for-byte. + + Returns + ------- + StochasticRocket + A stochastic Calisto rocket with no global-``random`` dependence. + """ + motor = StochasticSolidMotor( + solid_motor=cesaroni_m1670, + burn_out_time=(4, 0.1), + grains_center_of_mass_position=0.001, + grain_density=50, + grain_separation=1 / 1000, + grain_initial_height=1 / 1000, + grain_initial_inner_radius=0.375 / 1000, + grain_outer_radius=0.375 / 1000, + total_impulse=(6500, 1000), + throat_radius=0.5 / 1000, + nozzle_radius=0.5 / 1000, + nozzle_position=0.001, + ) + rocket = StochasticRocket( + rocket=calisto_robust, + radius=0.0127 / 2000, + mass=(15.426, 0.5, "normal"), + inertia_11=(6.321, 0), + inertia_22=0.01, + inertia_33=0.01, + center_of_mass_without_motor=0, + ) + rocket.add_motor(motor, position=0.001) + rocket.add_nose(stochastic_nose_cone, position=(1.134, 0.001)) + rocket.add_trapezoidal_fins(stochastic_trapezoidal_fins, position=(0.001, "normal")) + rocket.add_tail(stochastic_tail) + rocket.set_rail_buttons( + stochastic_rail_buttons, lower_button_position=(-0.618, 0.001, "normal") + ) + rocket.add_parachute(stochastic_main_parachute) + rocket.add_parachute(stochastic_drogue_parachute) + return rocket + + +def _read_inputs_by_index(input_file): + """Read a Monte Carlo ``.inputs.txt`` file into ``{index: raw_json_line}``. + + Keying by the ``index`` field makes the comparison robust to the order in + which parallel workers append their results. + + Parameters + ---------- + input_file : str or pathlib.Path + Path to the Monte Carlo inputs file. + + Returns + ------- + dict[int, str] + Mapping of simulation index to the raw (stripped) JSON line written for + that simulation. + """ + by_index = {} + with open(input_file, mode="r", encoding="utf-8") as rows: + for line in rows: + line = line.strip() + if not line: + continue + record = json.loads(line) + by_index[record["index"]] = line + return by_index + + +def _simulate_inputs( + monkeypatch, + tmp_path, + environment, + rocket, + flight, + tag, + *, + n_simulations, + random_seed, + parallel, + n_workers=None, +): + """Run a Monte Carlo simulation with a stubbed ``Flight`` and return inputs. + + Parameters + ---------- + monkeypatch : pytest.MonkeyPatch + Fixture used to swap ``Flight`` for :class:`_StubFlight`. + tmp_path : pathlib.Path + Temporary directory for the Monte Carlo output files. + environment, rocket, flight : StochasticModel + Stochastic models shared across runs. + tag : str + Unique filename stem so successive runs do not clobber each other. + n_simulations : int + Number of simulations to run. + random_seed : int or None + Seed forwarded to ``MonteCarlo.simulate``. + parallel : bool + Whether to run in parallel mode. + n_workers : int, optional + Number of workers for parallel mode. + + Returns + ------- + dict[int, str] + Mapping of simulation index to the raw input JSON line. + """ + monkeypatch.setattr(mc_module, "Flight", _StubFlight) + montecarlo = MonteCarlo( + filename=str(tmp_path / tag), + environment=environment, + rocket=rocket, + flight=flight, + ) + montecarlo.simulate( + number_of_simulations=n_simulations, + append=False, + parallel=parallel, + random_seed=random_seed, + n_workers=n_workers, + ) + return _read_inputs_by_index(montecarlo.input_file) + + +def test_montecarlo_inputs_serial_reproducible( + monkeypatch, + tmp_path, + stochastic_environment, + stochastic_calisto_numpy_only, + stochastic_flight, +): + """Two serial runs with the same seed yield identical inputs per index.""" + models = ( + stochastic_environment, + stochastic_calisto_numpy_only, + stochastic_flight, + ) + run_a = _simulate_inputs( + monkeypatch, + tmp_path, + *models, + "serial_a", + n_simulations=6, + random_seed=20240617, + parallel=False, + ) + run_b = _simulate_inputs( + monkeypatch, + tmp_path, + *models, + "serial_b", + n_simulations=6, + random_seed=20240617, + parallel=False, + ) + + assert sorted(run_a) == list(range(6)) + assert run_a == run_b + + +@requires_fork +def test_montecarlo_inputs_worker_invariant( + monkeypatch, + tmp_path, + stochastic_environment, + stochastic_calisto_numpy_only, + stochastic_flight, +): + """Serial == parallel(2) == parallel(N): inputs are bit-identical per index. + + The per-iteration input JSON for a given simulation index must be byte-for- + byte identical regardless of whether the run was serial, parallel with two + workers, or parallel with a larger number of workers. + """ + n_simulations = 8 + random_seed = 314159 + models = ( + stochastic_environment, + stochastic_calisto_numpy_only, + stochastic_flight, + ) + + serial = _simulate_inputs( + monkeypatch, + tmp_path, + *models, + "serial", + n_simulations=n_simulations, + random_seed=random_seed, + parallel=False, + ) + parallel_2 = _simulate_inputs( + monkeypatch, + tmp_path, + *models, + "parallel_2", + n_simulations=n_simulations, + random_seed=random_seed, + parallel=True, + n_workers=2, + ) + parallel_n = _simulate_inputs( + monkeypatch, + tmp_path, + *models, + "parallel_n", + n_simulations=n_simulations, + random_seed=random_seed, + parallel=True, + n_workers=4, + ) + + # Every index must be present exactly once in each run. + expected_indices = list(range(n_simulations)) + assert sorted(serial) == expected_indices + assert sorted(parallel_2) == expected_indices + assert sorted(parallel_n) == expected_indices + + # Worker-invariance: bit-identical input line per index across all modes. + for index in expected_indices: + assert serial[index] == parallel_2[index], ( + f"serial vs parallel(2) inputs differ at index {index}" + ) + assert serial[index] == parallel_n[index], ( + f"serial vs parallel(4) inputs differ at index {index}" + ) + + +def test_montecarlo_none_seed_runs( + monkeypatch, + tmp_path, + stochastic_environment, + stochastic_calisto_numpy_only, + stochastic_flight, +): + """``random_seed=None`` stays functional (non-reproducible but complete). + + ``numpy.random.SeedSequence(None)`` pulls fresh entropy, so the run cannot + be reproduced, but it must still execute and export one record per index. + """ + inputs = _simulate_inputs( + monkeypatch, + tmp_path, + stochastic_environment, + stochastic_calisto_numpy_only, + stochastic_flight, + "none_seed", + n_simulations=5, + random_seed=None, + parallel=False, + ) + + assert sorted(inputs) == list(range(5)) + + +def test_montecarlo_inputs_append_reproducible( + monkeypatch, + tmp_path, + stochastic_environment, + stochastic_calisto_numpy_only, + stochastic_flight, +): + """Appending to a partial run reproduces a single full run, index for index. + + ``SeedSequence.spawn`` is invariant to the spawn count, so simulation ``i`` + draws the same seed whether it was produced in the first batch or in a later + ``append`` batch. A 3 + 3 (append) run must therefore match a single run of 6 + with the same seed. ``number_of_simulations`` is the cumulative target, so the + append call passes 6, not the 3 additional simulations. + """ + monkeypatch.setattr(mc_module, "Flight", _StubFlight) + seed = 2718281 + environment = stochastic_environment + rocket = stochastic_calisto_numpy_only + flight = stochastic_flight + + split = MonteCarlo( + filename=str(tmp_path / "split"), + environment=environment, + rocket=rocket, + flight=flight, + ) + split.simulate(number_of_simulations=3, append=False, random_seed=seed) + split.simulate(number_of_simulations=6, append=True, random_seed=seed) + split_inputs = _read_inputs_by_index(split.input_file) + + single_inputs = _simulate_inputs( + monkeypatch, + tmp_path, + environment, + rocket, + flight, + "single", + n_simulations=6, + random_seed=seed, + parallel=False, + ) + + assert sorted(split_inputs) == list(range(6)) + assert split_inputs == single_inputs