diff --git a/src/bw_processing/__init__.py b/src/bw_processing/__init__.py index 06c3fb7..e36ffff 100644 --- a/src/bw_processing/__init__.py +++ b/src/bw_processing/__init__.py @@ -17,6 +17,7 @@ "generic_zipfile_filesystem", "INDICES_DTYPE", "load_datapackage", + "ArrayEntry", "MatrixEntry", "MatrixName", "MatrixSerializeFormat", @@ -58,7 +59,7 @@ from bw_processing.filesystem import clean_datapackage_name, md5, safe_filename from bw_processing.indexing import reindex, reset_index from bw_processing.io_helpers import generic_directory_filesystem, generic_zipfile_filesystem -from bw_processing.matrix_entry import MatrixEntry, MatrixName, create_datapackage_from_entries +from bw_processing.matrix_entry import ArrayEntry, MatrixEntry, MatrixName, create_datapackage_from_entries from bw_processing.merging import merge_datapackages_with_mask from bw_processing.proxies import UndefinedInterface from bw_processing.unique_fields import as_unique_attributes, as_unique_attributes_dataframe diff --git a/src/bw_processing/datapackage.py b/src/bw_processing/datapackage.py index 240d207..8d3ca1e 100644 --- a/src/bw_processing/datapackage.py +++ b/src/bw_processing/datapackage.py @@ -6,7 +6,7 @@ from typing import TYPE_CHECKING, Any, Dict, List, Optional, Union if TYPE_CHECKING: - from bw_processing.matrix_entry import MatrixEntry + from bw_processing.matrix_entry import ArrayEntry, MatrixEntry try: from stats_arrays import NoUncertainty, UndefinedUncertainty @@ -548,6 +548,32 @@ def add_entries( nrows=len(entries), ) + def add_array_entries( + self, + *, + matrix: str, + entries: list["ArrayEntry"], + ) -> None: + """Add matrix data from a list of :class:`.ArrayEntry` objects. + + Each :class:`.ArrayEntry` becomes one persistent-array resource group. + Resource group names are auto-generated. + + Args: + matrix: Name of the target matrix (e.g. ``"technosphere"``). + entries: List of :class:`.ArrayEntry` instances. + """ + for entry in entries: + indices = np.empty(len(entry.rows), dtype=INDICES_DTYPE) + indices["row"] = entry.rows + indices["col"] = entry.cols + self.add_persistent_array( + matrix=matrix, + indices_array=indices, + data_array=entry.data, + flip_array=entry.flip, + ) + def add_persistent_vector( self, *, # Forces use of keyword arguments diff --git a/src/bw_processing/matrix_entry.py b/src/bw_processing/matrix_entry.py index 37288a9..420c8c1 100644 --- a/src/bw_processing/matrix_entry.py +++ b/src/bw_processing/matrix_entry.py @@ -1,6 +1,9 @@ import dataclasses import math from enum import Enum +from typing import Optional + +import numpy as np try: from stats_arrays import NoUncertainty, UndefinedUncertainty @@ -83,6 +86,56 @@ def as_dict(self) -> dict: return dataclasses.asdict(self) +@dataclasses.dataclass +class ArrayEntry: + """All index/flip metadata for one persistent-array resource group. + + Unlike :class:`MatrixEntry`, which represents a single row, ``ArrayEntry`` + holds every row of a resource group together so that the 2-D scenario + ``data`` array can be supplied directly without decomposing and + reassembling it. + + Args: + rows: 1-D sequence of integer row indices, one per matrix entry. + cols: 1-D sequence of integer column indices, one per matrix entry. + data: 2-D array of shape ``(n_entries, n_scenarios)``. + flip: Optional 1-D boolean sequence of length ``n_entries``. + """ + + rows: np.ndarray + cols: np.ndarray + data: np.ndarray + flip: Optional[np.ndarray] = None + + def __post_init__(self): + self.rows = np.asarray(self.rows) + self.cols = np.asarray(self.cols) + self.data = np.asarray(self.data) + + if self.rows.ndim != 1: + raise ValueError(f"`rows` must be 1-D, got shape {self.rows.shape}") + if not np.issubdtype(self.rows.dtype, np.integer): + raise ValueError(f"`rows` must have integer dtype, got {self.rows.dtype}") + if self.cols.shape != self.rows.shape: + raise ValueError( + f"`cols` shape {self.cols.shape} doesn't match `rows` shape {self.rows.shape}" + ) + if not np.issubdtype(self.cols.dtype, np.integer): + raise ValueError(f"`cols` must have integer dtype, got {self.cols.dtype}") + if self.data.ndim != 2: + raise ValueError(f"`data` must be 2-D, got {self.data.ndim}-D") + if self.data.shape[0] != len(self.rows): + raise ValueError( + f"`data` has {self.data.shape[0]} rows but `rows` has {len(self.rows)} entries" + ) + if self.flip is not None: + self.flip = np.asarray(self.flip, dtype=bool) + if self.flip.shape != self.rows.shape: + raise ValueError( + f"`flip` shape {self.flip.shape} doesn't match `rows` shape {self.rows.shape}" + ) + + def create_datapackage_from_entries( data: dict, fs=None, diff --git a/tests/test_matrix_entry.py b/tests/test_matrix_entry.py index d4a1c91..497cfd2 100644 --- a/tests/test_matrix_entry.py +++ b/tests/test_matrix_entry.py @@ -5,12 +5,14 @@ import pytest from bw_processing import ( + ArrayEntry, MatrixEntry, MatrixName, + create_datapackage, create_datapackage_from_entries, simple_graph, ) -from bw_processing.constants import UNCERTAINTY_DTYPE +from bw_processing.constants import INDICES_DTYPE, UNCERTAINTY_DTYPE class TestMatrixName: @@ -168,6 +170,122 @@ def test_metadata_passed_through(self): ) assert dp.metadata["name"] == "my-package" +class TestArrayEntry: + def test_basic_construction(self): + e = ArrayEntry(rows=[0, 1], cols=[2, 3], data=np.ones((2, 4))) + assert list(e.rows) == [0, 1] + assert list(e.cols) == [2, 3] + assert e.data.shape == (2, 4) + assert e.flip is None + + def test_with_flip(self): + e = ArrayEntry(rows=[0, 1], cols=[2, 3], data=np.ones((2, 4)), flip=[True, False]) + assert list(e.flip) == [True, False] + + def test_numpy_inputs(self): + rows = np.array([0, 1, 2]) + cols = np.array([3, 4, 5]) + data = np.ones((3, 10)) + e = ArrayEntry(rows=rows, cols=cols, data=data) + assert e.data.shape == (3, 10) + + def test_fields_are_normalized_to_ndarray(self): + e = ArrayEntry(rows=[0, 1], cols=[2, 3], data=np.ones((2, 4))) + assert isinstance(e.rows, np.ndarray) + assert isinstance(e.cols, np.ndarray) + assert isinstance(e.data, np.ndarray) + + def test_flip_coerced_to_bool(self): + e = ArrayEntry(rows=[0, 1], cols=[2, 3], data=np.ones((2, 4)), flip=[1, 0]) + assert e.flip.dtype == bool + assert list(e.flip) == [True, False] + + def test_rows_must_be_1d(self): + with pytest.raises(ValueError, match="1-D"): + ArrayEntry(rows=[[0, 1], [2, 3]], cols=[0, 1, 2, 3], data=np.ones((4, 2))) + + def test_rows_must_be_integer_dtype(self): + with pytest.raises(ValueError, match="integer dtype"): + ArrayEntry(rows=np.array([1.7, 2.9]), cols=np.array([3, 4]), data=np.ones((2, 3))) + + def test_cols_must_be_integer_dtype(self): + with pytest.raises(ValueError, match="integer dtype"): + ArrayEntry(rows=np.array([1, 2]), cols=np.array([3.0, 4.0]), data=np.ones((2, 3))) + + def test_cols_shape_mismatch(self): + with pytest.raises(ValueError, match="cols.*rows"): + ArrayEntry(rows=[0, 1], cols=[0, 1, 2], data=np.ones((2, 3))) + + def test_data_must_be_2d(self): + with pytest.raises(ValueError, match="2-D"): + ArrayEntry(rows=[0, 1], cols=[2, 3], data=np.ones(2)) + + def test_data_row_count_mismatch(self): + with pytest.raises(ValueError, match="data.*rows"): + ArrayEntry(rows=[0, 1], cols=[2, 3], data=np.ones((3, 4))) + + def test_flip_shape_mismatch(self): + with pytest.raises(ValueError, match="flip.*rows"): + ArrayEntry(rows=[0, 1], cols=[2, 3], data=np.ones((2, 4)), flip=[True, False, True]) + + +class TestAddArrayEntries: + def test_single_entry(self): + dp = create_datapackage() + data = np.array([[1.0, 2.0], [3.0, 4.0]]) + entry = ArrayEntry(rows=[0, 1], cols=[2, 3], data=data) + dp.add_array_entries(matrix="technosphere_matrix", entries=[entry]) + assert len(dp.groups) == 1 + + def test_indices_stored_correctly(self): + dp = create_datapackage() + data = np.ones((2, 3)) + entry = ArrayEntry(rows=[5, 6], cols=[7, 8], data=data) + dp.add_array_entries(matrix="technosphere_matrix", entries=[entry]) + group = next(iter(dp.groups.values())) + idx_resource = next(r for r in group.resources if r["kind"] == "indices") + idx = dp.data[dp.resources.index(idx_resource)] + assert idx.dtype == np.dtype(INDICES_DTYPE) + assert list(idx["row"]) == [5, 6] + assert list(idx["col"]) == [7, 8] + + def test_data_stored_correctly(self): + dp = create_datapackage() + data = np.array([[1.0, 2.0, 3.0], [4.0, 5.0, 6.0]]) + entry = ArrayEntry(rows=[0, 1], cols=[2, 3], data=data) + dp.add_array_entries(matrix="technosphere_matrix", entries=[entry]) + group = next(iter(dp.groups.values())) + data_resource = next(r for r in group.resources if r["kind"] == "data") + stored = dp.data[dp.resources.index(data_resource)] + np.testing.assert_array_equal(stored, data) + + def test_flip_stored(self): + dp = create_datapackage() + data = np.ones((2, 3)) + entry = ArrayEntry(rows=[0, 1], cols=[2, 3], data=data, flip=[True, False]) + dp.add_array_entries(matrix="technosphere_matrix", entries=[entry]) + group = next(iter(dp.groups.values())) + flip_resource = next(r for r in group.resources if r["kind"] == "flip") + flip = dp.data[dp.resources.index(flip_resource)] + assert flip[0] is np.bool_(True) + assert flip[1] is np.bool_(False) + + def test_multiple_entries_create_multiple_groups(self): + dp = create_datapackage() + e1 = ArrayEntry(rows=[0], cols=[1], data=np.ones((1, 2))) + e2 = ArrayEntry(rows=[2], cols=[3], data=np.ones((1, 5))) + dp.add_array_entries(matrix="technosphere_matrix", entries=[e1, e2]) + assert len(dp.groups) == 2 + + def test_no_flip_resource_when_flip_is_none(self): + dp = create_datapackage() + entry = ArrayEntry(rows=[0, 1], cols=[2, 3], data=np.ones((2, 3))) + dp.add_array_entries(matrix="technosphere_matrix", entries=[entry]) + group = next(iter(dp.groups.values())) + kinds = [r["kind"] for r in group.resources] + assert "flip" not in kinds + + class TestSimpleGraphDeprecation: def test_deprecation_warning(self): with warnings.catch_warnings(record=True) as w: