diff --git a/src/bw_processing/datapackage.py b/src/bw_processing/datapackage.py index 8d3ca1e..4db2615 100644 --- a/src/bw_processing/datapackage.py +++ b/src/bw_processing/datapackage.py @@ -511,6 +511,7 @@ def add_persistent_vector_from_iterator( indices_array, distributions_array, flip_array, + rescale_array, ) = resolve_dict_iterator(dict_iterator, nrows) self.add_persistent_vector( matrix=matrix, @@ -520,6 +521,7 @@ def add_persistent_vector_from_iterator( indices_array=indices_array, flip_array=flip_array, distributions_array=distributions_array, + rescale_array=rescale_array, matrix_serialize_format_type=matrix_serialize_format_type, **kwargs, ) @@ -534,7 +536,8 @@ def add_entries( """Add matrix data from a list of :class:`MatrixEntry` objects. High-level convenience method that does not require working directly - with NumPy arrays. + with NumPy arrays. If any entry has a ``rescale`` value other than + ``1.0``, the rescale values are stored as a ``rescale_array`` resource. Args: matrix: Name of the target matrix (e.g. ``"technosphere"``). @@ -557,7 +560,8 @@ def add_array_entries( """Add matrix data from a list of :class:`.ArrayEntry` objects. Each :class:`.ArrayEntry` becomes one persistent-array resource group. - Resource group names are auto-generated. + Resource group names are auto-generated. If an entry has a ``rescale`` + array it is stored as a ``rescale_array`` resource (``kind="rescale"``). Args: matrix: Name of the target matrix (e.g. ``"technosphere"``). @@ -572,6 +576,7 @@ def add_array_entries( indices_array=indices, data_array=entry.data, flip_array=entry.flip, + rescale_array=entry.rescale, ) def add_persistent_vector( @@ -583,7 +588,7 @@ def add_persistent_vector( data_array: Optional[np.ndarray] = None, flip_array: Optional[np.ndarray] = None, distributions_array: Optional[np.ndarray] = None, - scale_array: Optional[np.ndarray] = None, + rescale_array: Optional[np.ndarray] = None, params_array: Optional[np.ndarray] = None, param_labels: Optional[list] = None, param_label_schema: Optional[AnyLabelSchema] = None, @@ -593,7 +598,7 @@ def add_persistent_vector( ) -> None: """Add a persistent vector resource group to the datapackage. - ``scale_array`` is an optional 1-D float array of the same length as + ``rescale_array`` is an optional 1-D float array of the same length as ``indices_array``. Each element is a multiplicative factor applied to the corresponding data value — whether static or stochastic — before the value is inserted into the matrix. Typical uses are allocation @@ -699,9 +704,9 @@ def add_persistent_vector( meta_type="generic", **kwargs, ) - if scale_array is not None: - self._add_scale_array_resource( - scale_array=scale_array, + if rescale_array is not None: + self._add_rescale_array_resource( + rescale_array=rescale_array, indices_array=indices_array, name=name, keep_proxy=keep_proxy, @@ -745,7 +750,7 @@ def add_persistent_array( indices_array: np.ndarray, name: Optional[str] = None, flip_array: Optional[np.ndarray] = None, - scale_array: Optional[np.ndarray] = None, + rescale_array: Optional[np.ndarray] = None, params_array: Optional[np.ndarray] = None, param_labels: Optional[list] = None, param_label_schema: Optional[AnyLabelSchema] = None, @@ -755,7 +760,7 @@ def add_persistent_array( ) -> None: """Add a persistent array resource group to the datapackage. - ``scale_array`` is an optional 1-D float array of the same length as + ``rescale_array`` is an optional 1-D float array of the same length as ``indices_array``. Each element is a multiplicative factor applied to the corresponding data value — whether static or stochastic — before the value is inserted into the matrix. Typical uses are allocation @@ -835,9 +840,9 @@ def add_persistent_array( meta_type="generic", **kwargs, ) - if scale_array is not None: - self._add_scale_array_resource( - scale_array=scale_array, + if rescale_array is not None: + self._add_rescale_array_resource( + rescale_array=rescale_array, indices_array=indices_array, name=name, keep_proxy=keep_proxy, @@ -906,7 +911,7 @@ def write_modified(self): if kind == "indices": meta_object = "vector" meta_type = "indices" - elif kind in ("flip", "scale", "params"): + elif kind in ("flip", "rescale", "params"): meta_object = "vector" meta_type = "generic" elif kind == "distributions": @@ -947,32 +952,32 @@ def write_modified(self): self._modified = set() - def _add_scale_array_resource( + def _add_rescale_array_resource( self, *, - scale_array: np.ndarray, + rescale_array: np.ndarray, indices_array: np.ndarray, name: str, keep_proxy: bool, matrix_serialize_format_type: Optional[MatrixSerializeFormat], **kwargs, ) -> None: - scale_array = load_bytes(scale_array) - if not np.issubdtype(scale_array.dtype, np.floating): + rescale_array = load_bytes(rescale_array) + if not np.issubdtype(rescale_array.dtype, np.floating): raise WrongDatatype( - "`scale_array` dtype is {}, but must be a float dtype".format(scale_array.dtype) + "`rescale_array` dtype is {}, but must be a float dtype".format(rescale_array.dtype) ) - elif scale_array.shape != indices_array.shape: + elif rescale_array.shape != indices_array.shape: raise ShapeMismatch( - "`scale_array` shape ({}) doesn't match `indices_array` ({}).".format( - scale_array.shape, indices_array.shape + "`rescale_array` shape ({}) doesn't match `indices_array` ({}).".format( + rescale_array.shape, indices_array.shape ) ) self._add_numpy_array_resource( - array=scale_array, + array=rescale_array, group=name, - name=name + ".scale", - kind="scale", + name=name + ".rescale", + kind="rescale", keep_proxy=keep_proxy, matrix_serialize_format_type=matrix_serialize_format_type, meta_object="vector", @@ -1126,7 +1131,7 @@ def add_dynamic_vector( indices_array: np.ndarray, # Not interface name: Optional[str] = None, flip_array: Optional[np.ndarray] = None, # Not interface - scale_array: Optional[np.ndarray] = None, # Not interface + rescale_array: Optional[np.ndarray] = None, # Not interface params_array: Optional[np.ndarray] = None, # Not interface param_labels: Optional[list] = None, param_label_schema: Optional[AnyLabelSchema] = None, @@ -1140,7 +1145,7 @@ def add_dynamic_vector( stored on disk. ``interface`` must implement ``__next__()`` and return a 1-D numpy array of length ``len(indices_array)`` each time it is called. - The ``indices_array``, optional ``flip_array``, optional ``scale_array``, + The ``indices_array``, optional ``flip_array``, optional ``rescale_array``, and optional ``params_array`` are static and are stored as normal numpy resources. See ``add_persistent_vector`` for documentation of the ``params_array``, ``param_labels``, and ``param_label_schema`` arguments. @@ -1154,7 +1159,7 @@ def add_dynamic_vector( name: Optional resource group name; auto-generated if omitted. flip_array: Optional boolean array; where ``True`` the value is multiplied by ``-1`` before insertion. - scale_array: Optional 1-D float array of multiplicative factors + rescale_array: Optional 1-D float array of multiplicative factors applied before matrix insertion. keep_proxy: If ``True``, store a proxy rather than the raw array for on-disk resources. @@ -1203,9 +1208,9 @@ def add_dynamic_vector( meta_type="generic", **kwargs, ) - if scale_array is not None: - self._add_scale_array_resource( - scale_array=scale_array, + if rescale_array is not None: + self._add_rescale_array_resource( + rescale_array=rescale_array, indices_array=indices_array, name=name, keep_proxy=keep_proxy, @@ -1259,7 +1264,7 @@ def add_dynamic_array( indices_array: np.ndarray, # Not interface name: Optional[str] = None, flip_array: Optional[np.ndarray] = None, - scale_array: Optional[np.ndarray] = None, # Not interface + rescale_array: Optional[np.ndarray] = None, # Not interface params_array: Optional[np.ndarray] = None, # Not interface param_labels: Optional[list] = None, param_label_schema: Optional[AnyLabelSchema] = None, @@ -1275,7 +1280,7 @@ def add_dynamic_array( array for ``args[1]``. ``ncols`` may be ``None`` for an infinite interface. - The ``indices_array``, optional ``flip_array``, optional ``scale_array``, + The ``indices_array``, optional ``flip_array``, optional ``rescale_array``, and optional ``params_array`` are static and are stored as normal numpy resources. For dynamic arrays the column count of ``params_array`` is not validated against the interface (whose column count may be unknown at @@ -1291,7 +1296,7 @@ def add_dynamic_array( name: Optional resource group name; auto-generated if omitted. flip_array: Optional boolean array; where ``True`` the value is multiplied by ``-1`` before insertion. - scale_array: Optional 1-D float array of multiplicative factors + rescale_array: Optional 1-D float array of multiplicative factors applied before matrix insertion. keep_proxy: If ``True``, store a proxy rather than the raw array for on-disk resources. @@ -1343,9 +1348,9 @@ def add_dynamic_array( meta_type="generic", **kwargs, ) - if scale_array is not None: - self._add_scale_array_resource( - scale_array=scale_array, + if rescale_array is not None: + self._add_rescale_array_resource( + rescale_array=rescale_array, indices_array=indices_array, name=name, keep_proxy=keep_proxy, diff --git a/src/bw_processing/matrix_entry.py b/src/bw_processing/matrix_entry.py index 420c8c1..e019d0f 100644 --- a/src/bw_processing/matrix_entry.py +++ b/src/bw_processing/matrix_entry.py @@ -58,6 +58,11 @@ class MatrixEntry: minimum: Lower bound for distribution sampling. maximum: Upper bound for distribution sampling. negative: Whether the underlying value is negative. + rescale: Per-exchange multiplicative factor applied before matrix + insertion. ``1.0`` (the default) leaves the value unchanged. + Stored as a ``rescale_array`` resource (``kind="rescale"``). Note + that the Python ``float`` value is downcast to ``numpy.float32`` + when written to the structured array. """ row: int @@ -71,6 +76,7 @@ class MatrixEntry: minimum: float = math.nan maximum: float = math.nan negative: bool = False + rescale: float = 1.0 def __post_init__(self): if self.uncertainty_type in _NO_UNCERTAINTY_IDS: @@ -100,12 +106,16 @@ class ArrayEntry: cols: 1-D sequence of integer column indices, one per matrix entry. data: 2-D array of shape ``(n_entries, n_scenarios)``. flip: Optional 1-D boolean sequence of length ``n_entries``. + rescale: Optional 1-D float array of per-entry multiplicative factors + (one per row). ``1.0`` leaves the value unchanged. Stored as a + ``rescale_array`` resource (``kind="rescale"``). """ rows: np.ndarray cols: np.ndarray data: np.ndarray flip: Optional[np.ndarray] = None + rescale: Optional[np.ndarray] = None def __post_init__(self): self.rows = np.asarray(self.rows) @@ -134,6 +144,12 @@ def __post_init__(self): raise ValueError( f"`flip` shape {self.flip.shape} doesn't match `rows` shape {self.rows.shape}" ) + if self.rescale is not None: + self.rescale = np.asarray(self.rescale, dtype=np.float32) + if self.rescale.shape != self.rows.shape: + raise ValueError( + f"`rescale` shape {self.rescale.shape} doesn't match `rows` shape {self.rows.shape}" + ) def create_datapackage_from_entries( diff --git a/src/bw_processing/utils.py b/src/bw_processing/utils.py index 08215ac..9a0f6ea 100644 --- a/src/bw_processing/utils.py +++ b/src/bw_processing/utils.py @@ -64,6 +64,7 @@ def dictionary_formatter(row: dict) -> tuple: row.get("maximum", np.nan), row.get("negative", False), row.get("flip", False), + row.get("rescale", 1.0), ) @@ -73,11 +74,13 @@ def resolve_dict_iterator(iterator: Any, nrows: int = None) -> tuple: data = (dictionary_formatter(row) for row in iterator) array = create_structured_array( data, - INDICES_DTYPE + [("amount", np.float32)] + UNCERTAINTY_DTYPE + [("flip", bool)], + INDICES_DTYPE + [("amount", np.float32)] + UNCERTAINTY_DTYPE + [("flip", bool), ("rescale", np.float32)], nrows=nrows, sort=True, sort_fields=sort_fields, ) + rescale = array["rescale"] + rescale_array = rescale if (rescale != 1.0).any() else None return ( array["amount"], # Not repacking fields would cause this multi-field index to return a view @@ -98,6 +101,7 @@ def resolve_dict_iterator(iterator: Any, nrows: int = None) -> tuple: ] ), array["flip"], + rescale_array, ) diff --git a/tests/test_datapackage.py b/tests/test_datapackage.py index f27c11e..f7ead09 100644 --- a/tests/test_datapackage.py +++ b/tests/test_datapackage.py @@ -457,28 +457,28 @@ def test_add_dynamic_vector_flip_shapemistmatch(): ) -def test_add_persistent_vector_scale_array(): +def test_add_persistent_vector_rescale_array(): dp = create_datapackage() data_array = np.array([2.0, 7.0, 12.0]) - scale_array = np.array([0.5, 1.0, 2.0]) + rescale_array = np.array([0.5, 1.0, 2.0]) indices_array = np.array([(1, 4), (2, 5), (3, 6)], dtype=INDICES_DTYPE) dp.add_persistent_vector( matrix="sa_matrix", data_array=data_array, name="sa-data-vector", indices_array=indices_array, - scale_array=scale_array, + rescale_array=rescale_array, ) - assert "sa-data-vector.scale" in [o["name"] for o in dp.resources] - data, meta = dp.get_resource("sa-data-vector.scale") - assert meta["kind"] == "scale" - assert np.allclose(data, scale_array) + assert "sa-data-vector.rescale" in [o["name"] for o in dp.resources] + data, meta = dp.get_resource("sa-data-vector.rescale") + assert meta["kind"] == "rescale" + assert np.allclose(data, rescale_array) def test_add_persistent_vector_scale_dtype(): dp = create_datapackage() data_array = np.array([2.0, 7.0, 12.0]) - scale_array = np.array([1, 2, 3]) # integer dtype + rescale_array = np.array([1, 2, 3]) # integer dtype indices_array = np.array([(1, 4), (2, 5), (3, 6)], dtype=INDICES_DTYPE) with pytest.raises(WrongDatatype): dp.add_persistent_vector( @@ -486,14 +486,14 @@ def test_add_persistent_vector_scale_dtype(): data_array=data_array, name="sa-data-vector", indices_array=indices_array, - scale_array=scale_array, + rescale_array=rescale_array, ) def test_add_persistent_vector_scale_shapemismatch(): dp = create_datapackage() data_array = np.array([2.0, 7.0, 12.0]) - scale_array = np.array([0.5, 1.0, 2.0, 3.0]) + rescale_array = np.array([0.5, 1.0, 2.0, 3.0]) indices_array = np.array([(1, 4), (2, 5), (3, 6)], dtype=INDICES_DTYPE) with pytest.raises(ShapeMismatch): dp.add_persistent_vector( @@ -501,32 +501,32 @@ def test_add_persistent_vector_scale_shapemismatch(): data_array=data_array, name="sa-data-vector", indices_array=indices_array, - scale_array=scale_array, + rescale_array=rescale_array, ) -def test_add_persistent_array_scale_array(): +def test_add_persistent_array_rescale_array(): dp = create_datapackage() data_array = np.arange(12, dtype=float).reshape(3, 4) - scale_array = np.array([0.5, 1.0, 2.0]) + rescale_array = np.array([0.5, 1.0, 2.0]) indices_array = np.array([(1, 4), (2, 5), (3, 6)], dtype=INDICES_DTYPE) dp.add_persistent_array( matrix="sa_matrix", data_array=data_array, name="sa-data-vector", indices_array=indices_array, - scale_array=scale_array, + rescale_array=rescale_array, ) - assert "sa-data-vector.scale" in [o["name"] for o in dp.resources] - data, meta = dp.get_resource("sa-data-vector.scale") - assert meta["kind"] == "scale" - assert np.allclose(data, scale_array) + assert "sa-data-vector.rescale" in [o["name"] for o in dp.resources] + data, meta = dp.get_resource("sa-data-vector.rescale") + assert meta["kind"] == "rescale" + assert np.allclose(data, rescale_array) def test_add_persistent_array_scale_dtype(): dp = create_datapackage() data_array = np.arange(12, dtype=float).reshape(3, 4) - scale_array = np.array([1, 2, 3]) + rescale_array = np.array([1, 2, 3]) indices_array = np.array([(1, 4), (2, 5), (3, 6)], dtype=INDICES_DTYPE) with pytest.raises(WrongDatatype): dp.add_persistent_array( @@ -534,14 +534,14 @@ def test_add_persistent_array_scale_dtype(): data_array=data_array, name="sa-data-vector", indices_array=indices_array, - scale_array=scale_array, + rescale_array=rescale_array, ) def test_add_persistent_array_scale_shapemismatch(): dp = create_datapackage() data_array = np.arange(12, dtype=float).reshape(3, 4) - scale_array = np.array([0.5, 1.0, 2.0, 3.0]) + rescale_array = np.array([0.5, 1.0, 2.0, 3.0]) indices_array = np.array([(1, 4), (2, 5), (3, 6)], dtype=INDICES_DTYPE) with pytest.raises(ShapeMismatch): dp.add_persistent_array( @@ -549,30 +549,30 @@ def test_add_persistent_array_scale_shapemismatch(): data_array=data_array, name="sa-data-vector", indices_array=indices_array, - scale_array=scale_array, + rescale_array=rescale_array, ) -def test_add_dynamic_vector_scale_array(): +def test_add_dynamic_vector_rescale_array(): dp = create_datapackage() - scale_array = np.array([0.5, 1.0, 2.0]) + rescale_array = np.array([0.5, 1.0, 2.0]) indices_array = np.array([(1, 4), (2, 5), (3, 6)], dtype=INDICES_DTYPE) dp.add_dynamic_vector( matrix="sa_matrix", interface=Dummy(), name="sa-data-vector", indices_array=indices_array, - scale_array=scale_array, + rescale_array=rescale_array, ) - assert "sa-data-vector.scale" in [o["name"] for o in dp.resources] - data, meta = dp.get_resource("sa-data-vector.scale") - assert meta["kind"] == "scale" - assert np.allclose(data, scale_array) + assert "sa-data-vector.rescale" in [o["name"] for o in dp.resources] + data, meta = dp.get_resource("sa-data-vector.rescale") + assert meta["kind"] == "rescale" + assert np.allclose(data, rescale_array) def test_add_dynamic_vector_scale_dtype(): dp = create_datapackage() - scale_array = np.array([1, 2, 3]) + rescale_array = np.array([1, 2, 3]) indices_array = np.array([(1, 4), (2, 5), (3, 6)], dtype=INDICES_DTYPE) with pytest.raises(WrongDatatype): dp.add_dynamic_vector( @@ -580,13 +580,13 @@ def test_add_dynamic_vector_scale_dtype(): interface=Dummy(), name="sa-data-vector", indices_array=indices_array, - scale_array=scale_array, + rescale_array=rescale_array, ) def test_add_dynamic_vector_scale_shapemismatch(): dp = create_datapackage() - scale_array = np.array([0.5, 1.0, 2.0, 3.0]) + rescale_array = np.array([0.5, 1.0, 2.0, 3.0]) indices_array = np.array([(1, 4), (2, 5), (3, 6)], dtype=INDICES_DTYPE) with pytest.raises(ShapeMismatch): dp.add_dynamic_vector( @@ -594,30 +594,30 @@ def test_add_dynamic_vector_scale_shapemismatch(): interface=Dummy(), name="sa-data-vector", indices_array=indices_array, - scale_array=scale_array, + rescale_array=rescale_array, ) -def test_add_dynamic_array_scale_array(): +def test_add_dynamic_array_rescale_array(): dp = create_datapackage() - scale_array = np.array([0.5, 1.0, 2.0]) + rescale_array = np.array([0.5, 1.0, 2.0]) indices_array = np.array([(1, 4), (2, 5), (3, 6)], dtype=INDICES_DTYPE) dp.add_dynamic_array( matrix="sa_matrix", interface=Dummy(), name="sa-data-vector", indices_array=indices_array, - scale_array=scale_array, + rescale_array=rescale_array, ) - assert "sa-data-vector.scale" in [o["name"] for o in dp.resources] - data, meta = dp.get_resource("sa-data-vector.scale") - assert meta["kind"] == "scale" - assert np.allclose(data, scale_array) + assert "sa-data-vector.rescale" in [o["name"] for o in dp.resources] + data, meta = dp.get_resource("sa-data-vector.rescale") + assert meta["kind"] == "rescale" + assert np.allclose(data, rescale_array) def test_add_dynamic_array_scale_dtype(): dp = create_datapackage() - scale_array = np.array([1, 2, 3]) + rescale_array = np.array([1, 2, 3]) indices_array = np.array([(1, 4), (2, 5), (3, 6)], dtype=INDICES_DTYPE) with pytest.raises(WrongDatatype): dp.add_dynamic_array( @@ -625,13 +625,13 @@ def test_add_dynamic_array_scale_dtype(): interface=Dummy(), name="sa-data-vector", indices_array=indices_array, - scale_array=scale_array, + rescale_array=rescale_array, ) def test_add_dynamic_array_scale_shapemismatch(): dp = create_datapackage() - scale_array = np.array([0.5, 1.0, 2.0, 3.0]) + rescale_array = np.array([0.5, 1.0, 2.0, 3.0]) indices_array = np.array([(1, 4), (2, 5), (3, 6)], dtype=INDICES_DTYPE) with pytest.raises(ShapeMismatch): dp.add_dynamic_array( @@ -639,7 +639,7 @@ def test_add_dynamic_array_scale_shapemismatch(): interface=Dummy(), name="sa-data-vector", indices_array=indices_array, - scale_array=scale_array, + rescale_array=rescale_array, ) @@ -782,8 +782,8 @@ def test_finalize_mixed_groups_prunes_only_trivial(tmp_path): assert dist_resources[0]["group"] == "real" -def test_scale_array_parquet_roundtrip(tmp_path): - scale_array = np.array([0.5, 1.0, 2.0]) +def test_rescale_array_parquet_roundtrip(tmp_path): + rescale_array = np.array([0.5, 1.0, 2.0]) indices_array = np.array([(1, 4), (2, 5), (3, 6)], dtype=INDICES_DTYPE) data_array = np.array([100.0, 200.0, 300.0]) @@ -796,16 +796,16 @@ def test_scale_array_parquet_roundtrip(tmp_path): data_array=data_array, name="sa-data-vector", indices_array=indices_array, - scale_array=scale_array, + rescale_array=rescale_array, matrix_serialize_format_type=MatrixSerializeFormat.PARQUET, ) dp.finalize_serialization() dp2 = load_datapackage(generic_directory_filesystem(dirpath=tmp_path)) - assert "sa-data-vector.scale" in [o["name"] for o in dp2.resources] - loaded, meta = dp2.get_resource("sa-data-vector.scale") - assert meta["kind"] == "scale" - assert np.allclose(loaded, scale_array) + assert "sa-data-vector.rescale" in [o["name"] for o in dp2.resources] + loaded, meta = dp2.get_resource("sa-data-vector.rescale") + assert meta["kind"] == "rescale" + assert np.allclose(loaded, rescale_array) def test_simple_graph(): diff --git a/tests/test_matrix_entry.py b/tests/test_matrix_entry.py index 497cfd2..ea04a62 100644 --- a/tests/test_matrix_entry.py +++ b/tests/test_matrix_entry.py @@ -49,6 +49,15 @@ def test_defaults(self): assert math.isnan(e.shape) assert math.isnan(e.minimum) assert math.isnan(e.maximum) + assert e.rescale == pytest.approx(1.0) + + def test_rescale_custom_value(self): + e = MatrixEntry(row=1, col=2, amount=3.0, rescale=0.5) + assert e.rescale == pytest.approx(0.5) + + def test_rescale_in_as_dict(self): + e = MatrixEntry(row=1, col=2, amount=3.0, rescale=2.0) + assert e.as_dict()["rescale"] == pytest.approx(2.0) def test_loc_set_to_amount_for_no_uncertainty(self): e = MatrixEntry(row=1, col=2, amount=5.0) @@ -81,6 +90,7 @@ def test_as_dict_keys(self): assert set(d.keys()) == { "row", "col", "amount", "flip", "uncertainty_type", "loc", "scale", "shape", "minimum", "maximum", "negative", + "rescale", } def test_as_dict_values(self): @@ -228,6 +238,19 @@ def test_flip_shape_mismatch(self): with pytest.raises(ValueError, match="flip.*rows"): ArrayEntry(rows=[0, 1], cols=[2, 3], data=np.ones((2, 4)), flip=[True, False, True]) + def test_rescale_default_is_none(self): + e = ArrayEntry(rows=[0, 1], cols=[2, 3], data=np.ones((2, 4))) + assert e.rescale is None + + def test_rescale_coerced_to_float32(self): + e = ArrayEntry(rows=[0, 1], cols=[2, 3], data=np.ones((2, 4)), rescale=[2.0, 0.5]) + assert e.rescale.dtype == np.float32 + np.testing.assert_array_almost_equal(e.rescale, [2.0, 0.5]) + + def test_rescale_shape_mismatch(self): + with pytest.raises(ValueError, match="rescale.*rows"): + ArrayEntry(rows=[0, 1], cols=[2, 3], data=np.ones((2, 4)), rescale=[1.0, 2.0, 3.0]) + class TestAddArrayEntries: def test_single_entry(self): @@ -285,6 +308,86 @@ def test_no_flip_resource_when_flip_is_none(self): kinds = [r["kind"] for r in group.resources] assert "flip" not in kinds + def test_no_rescale_resource_when_rescale_is_none(self): + dp = create_datapackage() + entry = ArrayEntry(rows=[0, 1], cols=[2, 3], data=np.ones((2, 3))) + dp.add_array_entries(matrix="technosphere_matrix", entries=[entry]) + group = next(iter(dp.groups.values())) + kinds = [r["kind"] for r in group.resources] + assert "rescale" not in kinds + + def test_rescale_stored_correctly(self): + dp = create_datapackage() + entry = ArrayEntry(rows=[0, 1], cols=[2, 3], data=np.ones((2, 3)), rescale=[2.0, 0.5]) + dp.add_array_entries(matrix="technosphere_matrix", entries=[entry]) + group = next(iter(dp.groups.values())) + rescale_resource = next(r for r in group.resources if r["kind"] == "rescale") + stored = dp.data[dp.resources.index(rescale_resource)] + np.testing.assert_array_almost_equal(stored, [2.0, 0.5]) + + +class TestAddEntries: + def test_no_rescale_resource_when_all_rescale_one(self): + dp = create_datapackage() + entries = [ + MatrixEntry(row=1, col=2, amount=1.0), + MatrixEntry(row=3, col=4, amount=2.0), + ] + dp.add_entries(matrix="technosphere_matrix", entries=entries) + group = next(iter(dp.groups.values())) + kinds = [r["kind"] for r in group.resources] + assert "rescale" not in kinds + + def test_rescale_resource_stored_when_rescale_set(self): + dp = create_datapackage() + entries = [ + MatrixEntry(row=1, col=2, amount=1.0, rescale=0.5), + MatrixEntry(row=3, col=4, amount=2.0, rescale=2.0), + ] + dp.add_entries(matrix="technosphere_matrix", entries=entries) + group = next(iter(dp.groups.values())) + rescale_resource = next(r for r in group.resources if r["kind"] == "rescale") + stored = dp.data[dp.resources.index(rescale_resource)] + np.testing.assert_array_almost_equal(sorted(stored), [0.5, 2.0]) + + def test_rescale_resource_written_when_only_some_entries_rescaled(self): + dp = create_datapackage() + entries = [ + MatrixEntry(row=1, col=2, amount=1.0), # rescale=1.0 (default) + MatrixEntry(row=3, col=4, amount=2.0, rescale=0.5), + ] + dp.add_entries(matrix="technosphere_matrix", entries=entries) + group = next(iter(dp.groups.values())) + idx_resource = next(r for r in group.resources if r["kind"] == "indices") + rescale_resource = next(r for r in group.resources if r["kind"] == "rescale") + indices = dp.data[dp.resources.index(idx_resource)] + rescales = dp.data[dp.resources.index(rescale_resource)] + for i, idx in enumerate(indices): + if idx["row"] == 1: + assert rescales[i] == pytest.approx(1.0) + else: + assert rescales[i] == pytest.approx(0.5) + + def test_rescale_sorted_with_data(self): + dp = create_datapackage() + entries = [ + MatrixEntry(row=3, col=4, amount=2.0, rescale=2.0), + MatrixEntry(row=1, col=2, amount=1.0, rescale=0.5), + ] + dp.add_entries(matrix="technosphere_matrix", entries=entries) + group = next(iter(dp.groups.values())) + + idx_resource = next(r for r in group.resources if r["kind"] == "indices") + rescale_resource = next(r for r in group.resources if r["kind"] == "rescale") + indices = dp.data[dp.resources.index(idx_resource)] + rescales = dp.data[dp.resources.index(rescale_resource)] + + for i, idx in enumerate(indices): + if idx["row"] == 1: + assert rescales[i] == pytest.approx(0.5) + else: + assert rescales[i] == pytest.approx(2.0) + class TestSimpleGraphDeprecation: def test_deprecation_warning(self): diff --git a/tests/test_utils.py b/tests/test_utils.py index 99841a1..f26dade 100644 --- a/tests/test_utils.py +++ b/tests/test_utils.py @@ -37,7 +37,7 @@ def test_dictionary_formatter_sparse(): result = dictionary_formatter(given) assert result[:5] == (1, 1, 4, 0, 4) assert all(np.isnan(x) for x in result[5:9]) - assert result[9:] == (False, False) + assert result[9:] == (False, False, 1.0) def test_dictionary_formatter_uncertainty_type(): @@ -68,7 +68,7 @@ def test_dictionary_formatter_complete(): "negative": True, "flip": False, } - expected = (1, 2, 3, 4, 5, 6, 7, 8, 9, True, False) + expected = (1, 2, 3, 4, 5, 6, 7, 8, 9, True, False, 1.0) assert dictionary_formatter(given) == expected @@ -85,10 +85,22 @@ def test_dictionary_formatter_one_dimensional(): "negative": True, "flip": False, } - expected = (1, 1, 3, 4, 5, 6, 7, 8, 9, True, False) + expected = (1, 1, 3, 4, 5, 6, 7, 8, 9, True, False, 1.0) assert dictionary_formatter(given) == expected +def test_dictionary_formatter_rescale(): + given = {"row": 1, "col": 2, "amount": 3.0, "rescale": 0.5} + result = dictionary_formatter(given) + assert result[-1] == 0.5 + + +def test_dictionary_formatter_rescale_default(): + given = {"row": 1, "amount": 4} + result = dictionary_formatter(given) + assert result[-1] == 1.0 + + def test_check_suffix(): assert check_suffix("foo.bar.baz", "baz") == "foo.bar.baz" assert check_suffix("foo.bar.baz", ".baz") == "foo.bar.baz"