Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion src/bw_processing/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@
"generic_zipfile_filesystem",
"INDICES_DTYPE",
"load_datapackage",
"ArrayEntry",
"MatrixEntry",
"MatrixName",
"MatrixSerializeFormat",
Expand Down Expand Up @@ -58,7 +59,7 @@
from bw_processing.filesystem import clean_datapackage_name, md5, safe_filename
from bw_processing.indexing import reindex, reset_index
from bw_processing.io_helpers import generic_directory_filesystem, generic_zipfile_filesystem
from bw_processing.matrix_entry import MatrixEntry, MatrixName, create_datapackage_from_entries
from bw_processing.matrix_entry import ArrayEntry, MatrixEntry, MatrixName, create_datapackage_from_entries
from bw_processing.merging import merge_datapackages_with_mask
from bw_processing.proxies import UndefinedInterface
from bw_processing.unique_fields import as_unique_attributes, as_unique_attributes_dataframe
28 changes: 27 additions & 1 deletion src/bw_processing/datapackage.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
from typing import TYPE_CHECKING, Any, Dict, List, Optional, Union

if TYPE_CHECKING:
from bw_processing.matrix_entry import MatrixEntry
from bw_processing.matrix_entry import ArrayEntry, MatrixEntry

try:
from stats_arrays import NoUncertainty, UndefinedUncertainty
Expand Down Expand Up @@ -548,6 +548,32 @@ def add_entries(
nrows=len(entries),
)

def add_array_entries(
self,
*,
matrix: str,
entries: list["ArrayEntry"],
) -> None:
"""Add matrix data from a list of :class:`.ArrayEntry` objects.

Each :class:`.ArrayEntry` becomes one persistent-array resource group.
Resource group names are auto-generated.

Args:
matrix: Name of the target matrix (e.g. ``"technosphere"``).
entries: List of :class:`.ArrayEntry` instances.
"""
for entry in entries:
indices = np.empty(len(entry.rows), dtype=INDICES_DTYPE)
indices["row"] = entry.rows
indices["col"] = entry.cols
self.add_persistent_array(
matrix=matrix,
indices_array=indices,
data_array=entry.data,
flip_array=entry.flip,
)

def add_persistent_vector(
self,
*, # Forces use of keyword arguments
Expand Down
53 changes: 53 additions & 0 deletions src/bw_processing/matrix_entry.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,9 @@
import dataclasses
import math
from enum import Enum
from typing import Optional

import numpy as np

try:
from stats_arrays import NoUncertainty, UndefinedUncertainty
Expand Down Expand Up @@ -83,6 +86,56 @@ def as_dict(self) -> dict:
return dataclasses.asdict(self)


@dataclasses.dataclass
class ArrayEntry:
"""All index/flip metadata for one persistent-array resource group.

Unlike :class:`MatrixEntry`, which represents a single row, ``ArrayEntry``
holds every row of a resource group together so that the 2-D scenario
``data`` array can be supplied directly without decomposing and
reassembling it.

Args:
rows: 1-D sequence of integer row indices, one per matrix entry.
cols: 1-D sequence of integer column indices, one per matrix entry.
data: 2-D array of shape ``(n_entries, n_scenarios)``.
flip: Optional 1-D boolean sequence of length ``n_entries``.
"""

rows: np.ndarray
cols: np.ndarray
data: np.ndarray
flip: Optional[np.ndarray] = None

def __post_init__(self):
self.rows = np.asarray(self.rows)
self.cols = np.asarray(self.cols)
self.data = np.asarray(self.data)

if self.rows.ndim != 1:
raise ValueError(f"`rows` must be 1-D, got shape {self.rows.shape}")
if not np.issubdtype(self.rows.dtype, np.integer):
raise ValueError(f"`rows` must have integer dtype, got {self.rows.dtype}")
if self.cols.shape != self.rows.shape:
raise ValueError(
f"`cols` shape {self.cols.shape} doesn't match `rows` shape {self.rows.shape}"
)
if not np.issubdtype(self.cols.dtype, np.integer):
raise ValueError(f"`cols` must have integer dtype, got {self.cols.dtype}")
if self.data.ndim != 2:
raise ValueError(f"`data` must be 2-D, got {self.data.ndim}-D")
if self.data.shape[0] != len(self.rows):
raise ValueError(
f"`data` has {self.data.shape[0]} rows but `rows` has {len(self.rows)} entries"
)
if self.flip is not None:
self.flip = np.asarray(self.flip, dtype=bool)
if self.flip.shape != self.rows.shape:
raise ValueError(
f"`flip` shape {self.flip.shape} doesn't match `rows` shape {self.rows.shape}"
)


def create_datapackage_from_entries(
data: dict,
fs=None,
Expand Down
120 changes: 119 additions & 1 deletion tests/test_matrix_entry.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,12 +5,14 @@
import pytest

from bw_processing import (
ArrayEntry,
MatrixEntry,
MatrixName,
create_datapackage,
create_datapackage_from_entries,
simple_graph,
)
from bw_processing.constants import UNCERTAINTY_DTYPE
from bw_processing.constants import INDICES_DTYPE, UNCERTAINTY_DTYPE


class TestMatrixName:
Expand Down Expand Up @@ -168,6 +170,122 @@ def test_metadata_passed_through(self):
)
assert dp.metadata["name"] == "my-package"

class TestArrayEntry:
def test_basic_construction(self):
e = ArrayEntry(rows=[0, 1], cols=[2, 3], data=np.ones((2, 4)))
assert list(e.rows) == [0, 1]
assert list(e.cols) == [2, 3]
assert e.data.shape == (2, 4)
assert e.flip is None

def test_with_flip(self):
e = ArrayEntry(rows=[0, 1], cols=[2, 3], data=np.ones((2, 4)), flip=[True, False])
assert list(e.flip) == [True, False]

def test_numpy_inputs(self):
rows = np.array([0, 1, 2])
cols = np.array([3, 4, 5])
data = np.ones((3, 10))
e = ArrayEntry(rows=rows, cols=cols, data=data)
assert e.data.shape == (3, 10)

def test_fields_are_normalized_to_ndarray(self):
e = ArrayEntry(rows=[0, 1], cols=[2, 3], data=np.ones((2, 4)))
assert isinstance(e.rows, np.ndarray)
assert isinstance(e.cols, np.ndarray)
assert isinstance(e.data, np.ndarray)

def test_flip_coerced_to_bool(self):
e = ArrayEntry(rows=[0, 1], cols=[2, 3], data=np.ones((2, 4)), flip=[1, 0])
assert e.flip.dtype == bool
assert list(e.flip) == [True, False]

def test_rows_must_be_1d(self):
with pytest.raises(ValueError, match="1-D"):
ArrayEntry(rows=[[0, 1], [2, 3]], cols=[0, 1, 2, 3], data=np.ones((4, 2)))

def test_rows_must_be_integer_dtype(self):
with pytest.raises(ValueError, match="integer dtype"):
ArrayEntry(rows=np.array([1.7, 2.9]), cols=np.array([3, 4]), data=np.ones((2, 3)))

def test_cols_must_be_integer_dtype(self):
with pytest.raises(ValueError, match="integer dtype"):
ArrayEntry(rows=np.array([1, 2]), cols=np.array([3.0, 4.0]), data=np.ones((2, 3)))

def test_cols_shape_mismatch(self):
with pytest.raises(ValueError, match="cols.*rows"):
ArrayEntry(rows=[0, 1], cols=[0, 1, 2], data=np.ones((2, 3)))

def test_data_must_be_2d(self):
with pytest.raises(ValueError, match="2-D"):
ArrayEntry(rows=[0, 1], cols=[2, 3], data=np.ones(2))

def test_data_row_count_mismatch(self):
with pytest.raises(ValueError, match="data.*rows"):
ArrayEntry(rows=[0, 1], cols=[2, 3], data=np.ones((3, 4)))

def test_flip_shape_mismatch(self):
with pytest.raises(ValueError, match="flip.*rows"):
ArrayEntry(rows=[0, 1], cols=[2, 3], data=np.ones((2, 4)), flip=[True, False, True])


class TestAddArrayEntries:
def test_single_entry(self):
dp = create_datapackage()
data = np.array([[1.0, 2.0], [3.0, 4.0]])
entry = ArrayEntry(rows=[0, 1], cols=[2, 3], data=data)
dp.add_array_entries(matrix="technosphere_matrix", entries=[entry])
assert len(dp.groups) == 1

def test_indices_stored_correctly(self):
dp = create_datapackage()
data = np.ones((2, 3))
entry = ArrayEntry(rows=[5, 6], cols=[7, 8], data=data)
dp.add_array_entries(matrix="technosphere_matrix", entries=[entry])
group = next(iter(dp.groups.values()))
idx_resource = next(r for r in group.resources if r["kind"] == "indices")
idx = dp.data[dp.resources.index(idx_resource)]
assert idx.dtype == np.dtype(INDICES_DTYPE)
assert list(idx["row"]) == [5, 6]
assert list(idx["col"]) == [7, 8]

def test_data_stored_correctly(self):
dp = create_datapackage()
data = np.array([[1.0, 2.0, 3.0], [4.0, 5.0, 6.0]])
entry = ArrayEntry(rows=[0, 1], cols=[2, 3], data=data)
dp.add_array_entries(matrix="technosphere_matrix", entries=[entry])
group = next(iter(dp.groups.values()))
data_resource = next(r for r in group.resources if r["kind"] == "data")
stored = dp.data[dp.resources.index(data_resource)]
np.testing.assert_array_equal(stored, data)

def test_flip_stored(self):
dp = create_datapackage()
data = np.ones((2, 3))
entry = ArrayEntry(rows=[0, 1], cols=[2, 3], data=data, flip=[True, False])
dp.add_array_entries(matrix="technosphere_matrix", entries=[entry])
group = next(iter(dp.groups.values()))
flip_resource = next(r for r in group.resources if r["kind"] == "flip")
flip = dp.data[dp.resources.index(flip_resource)]
assert flip[0] is np.bool_(True)
assert flip[1] is np.bool_(False)

def test_multiple_entries_create_multiple_groups(self):
dp = create_datapackage()
e1 = ArrayEntry(rows=[0], cols=[1], data=np.ones((1, 2)))
e2 = ArrayEntry(rows=[2], cols=[3], data=np.ones((1, 5)))
dp.add_array_entries(matrix="technosphere_matrix", entries=[e1, e2])
assert len(dp.groups) == 2

def test_no_flip_resource_when_flip_is_none(self):
dp = create_datapackage()
entry = ArrayEntry(rows=[0, 1], cols=[2, 3], data=np.ones((2, 3)))
dp.add_array_entries(matrix="technosphere_matrix", entries=[entry])
group = next(iter(dp.groups.values()))
kinds = [r["kind"] for r in group.resources]
assert "flip" not in kinds


class TestSimpleGraphDeprecation:
def test_deprecation_warning(self):
with warnings.catch_warnings(record=True) as w:
Expand Down
Loading