Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
77 changes: 41 additions & 36 deletions src/bw_processing/datapackage.py
Original file line number Diff line number Diff line change
Expand Up @@ -511,6 +511,7 @@ def add_persistent_vector_from_iterator(
indices_array,
distributions_array,
flip_array,
rescale_array,
) = resolve_dict_iterator(dict_iterator, nrows)
self.add_persistent_vector(
matrix=matrix,
Expand All @@ -520,6 +521,7 @@ def add_persistent_vector_from_iterator(
indices_array=indices_array,
flip_array=flip_array,
distributions_array=distributions_array,
rescale_array=rescale_array,
matrix_serialize_format_type=matrix_serialize_format_type,
**kwargs,
)
Expand All @@ -534,7 +536,8 @@ def add_entries(
"""Add matrix data from a list of :class:`MatrixEntry` objects.

High-level convenience method that does not require working directly
with NumPy arrays.
with NumPy arrays. If any entry has a ``rescale`` value other than
``1.0``, the rescale values are stored as a ``rescale_array`` resource.

Args:
matrix: Name of the target matrix (e.g. ``"technosphere"``).
Expand All @@ -557,7 +560,8 @@ def add_array_entries(
"""Add matrix data from a list of :class:`.ArrayEntry` objects.

Each :class:`.ArrayEntry` becomes one persistent-array resource group.
Resource group names are auto-generated.
Resource group names are auto-generated. If an entry has a ``rescale``
array it is stored as a ``rescale_array`` resource (``kind="rescale"``).

Args:
matrix: Name of the target matrix (e.g. ``"technosphere"``).
Expand All @@ -572,6 +576,7 @@ def add_array_entries(
indices_array=indices,
data_array=entry.data,
flip_array=entry.flip,
rescale_array=entry.rescale,
)

def add_persistent_vector(
Expand All @@ -583,7 +588,7 @@ def add_persistent_vector(
data_array: Optional[np.ndarray] = None,
flip_array: Optional[np.ndarray] = None,
distributions_array: Optional[np.ndarray] = None,
scale_array: Optional[np.ndarray] = None,
rescale_array: Optional[np.ndarray] = None,
params_array: Optional[np.ndarray] = None,
param_labels: Optional[list] = None,
param_label_schema: Optional[AnyLabelSchema] = None,
Expand All @@ -593,7 +598,7 @@ def add_persistent_vector(
) -> None:
"""Add a persistent vector resource group to the datapackage.

``scale_array`` is an optional 1-D float array of the same length as
``rescale_array`` is an optional 1-D float array of the same length as
``indices_array``. Each element is a multiplicative factor applied to
the corresponding data value — whether static or stochastic — before
the value is inserted into the matrix. Typical uses are allocation
Expand Down Expand Up @@ -699,9 +704,9 @@ def add_persistent_vector(
meta_type="generic",
**kwargs,
)
if scale_array is not None:
self._add_scale_array_resource(
scale_array=scale_array,
if rescale_array is not None:
self._add_rescale_array_resource(
rescale_array=rescale_array,
indices_array=indices_array,
name=name,
keep_proxy=keep_proxy,
Expand Down Expand Up @@ -745,7 +750,7 @@ def add_persistent_array(
indices_array: np.ndarray,
name: Optional[str] = None,
flip_array: Optional[np.ndarray] = None,
scale_array: Optional[np.ndarray] = None,
rescale_array: Optional[np.ndarray] = None,
params_array: Optional[np.ndarray] = None,
param_labels: Optional[list] = None,
param_label_schema: Optional[AnyLabelSchema] = None,
Expand All @@ -755,7 +760,7 @@ def add_persistent_array(
) -> None:
"""Add a persistent array resource group to the datapackage.

``scale_array`` is an optional 1-D float array of the same length as
``rescale_array`` is an optional 1-D float array of the same length as
``indices_array``. Each element is a multiplicative factor applied to
the corresponding data value — whether static or stochastic — before
the value is inserted into the matrix. Typical uses are allocation
Expand Down Expand Up @@ -835,9 +840,9 @@ def add_persistent_array(
meta_type="generic",
**kwargs,
)
if scale_array is not None:
self._add_scale_array_resource(
scale_array=scale_array,
if rescale_array is not None:
self._add_rescale_array_resource(
rescale_array=rescale_array,
indices_array=indices_array,
name=name,
keep_proxy=keep_proxy,
Expand Down Expand Up @@ -906,7 +911,7 @@ def write_modified(self):
if kind == "indices":
meta_object = "vector"
meta_type = "indices"
elif kind in ("flip", "scale", "params"):
elif kind in ("flip", "rescale", "params"):
meta_object = "vector"
meta_type = "generic"
elif kind == "distributions":
Expand Down Expand Up @@ -947,32 +952,32 @@ def write_modified(self):

self._modified = set()

def _add_scale_array_resource(
def _add_rescale_array_resource(
self,
*,
scale_array: np.ndarray,
rescale_array: np.ndarray,
indices_array: np.ndarray,
name: str,
keep_proxy: bool,
matrix_serialize_format_type: Optional[MatrixSerializeFormat],
**kwargs,
) -> None:
scale_array = load_bytes(scale_array)
if not np.issubdtype(scale_array.dtype, np.floating):
rescale_array = load_bytes(rescale_array)
if not np.issubdtype(rescale_array.dtype, np.floating):
raise WrongDatatype(
"`scale_array` dtype is {}, but must be a float dtype".format(scale_array.dtype)
"`rescale_array` dtype is {}, but must be a float dtype".format(rescale_array.dtype)
)
elif scale_array.shape != indices_array.shape:
elif rescale_array.shape != indices_array.shape:
raise ShapeMismatch(
"`scale_array` shape ({}) doesn't match `indices_array` ({}).".format(
scale_array.shape, indices_array.shape
"`rescale_array` shape ({}) doesn't match `indices_array` ({}).".format(
rescale_array.shape, indices_array.shape
)
)
self._add_numpy_array_resource(
array=scale_array,
array=rescale_array,
group=name,
name=name + ".scale",
kind="scale",
name=name + ".rescale",
kind="rescale",
keep_proxy=keep_proxy,
matrix_serialize_format_type=matrix_serialize_format_type,
meta_object="vector",
Expand Down Expand Up @@ -1126,7 +1131,7 @@ def add_dynamic_vector(
indices_array: np.ndarray, # Not interface
name: Optional[str] = None,
flip_array: Optional[np.ndarray] = None, # Not interface
scale_array: Optional[np.ndarray] = None, # Not interface
rescale_array: Optional[np.ndarray] = None, # Not interface
params_array: Optional[np.ndarray] = None, # Not interface
param_labels: Optional[list] = None,
param_label_schema: Optional[AnyLabelSchema] = None,
Expand All @@ -1140,7 +1145,7 @@ def add_dynamic_vector(
stored on disk. ``interface`` must implement ``__next__()`` and return a
1-D numpy array of length ``len(indices_array)`` each time it is called.

The ``indices_array``, optional ``flip_array``, optional ``scale_array``,
The ``indices_array``, optional ``flip_array``, optional ``rescale_array``,
and optional ``params_array`` are static and are stored as normal numpy
resources. See ``add_persistent_vector`` for documentation of the
``params_array``, ``param_labels``, and ``param_label_schema`` arguments.
Expand All @@ -1154,7 +1159,7 @@ def add_dynamic_vector(
name: Optional resource group name; auto-generated if omitted.
flip_array: Optional boolean array; where ``True`` the value is
multiplied by ``-1`` before insertion.
scale_array: Optional 1-D float array of multiplicative factors
rescale_array: Optional 1-D float array of multiplicative factors
applied before matrix insertion.
keep_proxy: If ``True``, store a proxy rather than the raw array
for on-disk resources.
Expand Down Expand Up @@ -1203,9 +1208,9 @@ def add_dynamic_vector(
meta_type="generic",
**kwargs,
)
if scale_array is not None:
self._add_scale_array_resource(
scale_array=scale_array,
if rescale_array is not None:
self._add_rescale_array_resource(
rescale_array=rescale_array,
indices_array=indices_array,
name=name,
keep_proxy=keep_proxy,
Expand Down Expand Up @@ -1259,7 +1264,7 @@ def add_dynamic_array(
indices_array: np.ndarray, # Not interface
name: Optional[str] = None,
flip_array: Optional[np.ndarray] = None,
scale_array: Optional[np.ndarray] = None, # Not interface
rescale_array: Optional[np.ndarray] = None, # Not interface
params_array: Optional[np.ndarray] = None, # Not interface
param_labels: Optional[list] = None,
param_label_schema: Optional[AnyLabelSchema] = None,
Expand All @@ -1275,7 +1280,7 @@ def add_dynamic_array(
array for ``args[1]``. ``ncols`` may be ``None`` for an infinite
interface.

The ``indices_array``, optional ``flip_array``, optional ``scale_array``,
The ``indices_array``, optional ``flip_array``, optional ``rescale_array``,
and optional ``params_array`` are static and are stored as normal numpy
resources. For dynamic arrays the column count of ``params_array`` is
not validated against the interface (whose column count may be unknown at
Expand All @@ -1291,7 +1296,7 @@ def add_dynamic_array(
name: Optional resource group name; auto-generated if omitted.
flip_array: Optional boolean array; where ``True`` the value is
multiplied by ``-1`` before insertion.
scale_array: Optional 1-D float array of multiplicative factors
rescale_array: Optional 1-D float array of multiplicative factors
applied before matrix insertion.
keep_proxy: If ``True``, store a proxy rather than the raw array
for on-disk resources.
Expand Down Expand Up @@ -1343,9 +1348,9 @@ def add_dynamic_array(
meta_type="generic",
**kwargs,
)
if scale_array is not None:
self._add_scale_array_resource(
scale_array=scale_array,
if rescale_array is not None:
self._add_rescale_array_resource(
rescale_array=rescale_array,
indices_array=indices_array,
name=name,
keep_proxy=keep_proxy,
Expand Down
16 changes: 16 additions & 0 deletions src/bw_processing/matrix_entry.py
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,11 @@ class MatrixEntry:
minimum: Lower bound for distribution sampling.
maximum: Upper bound for distribution sampling.
negative: Whether the underlying value is negative.
rescale: Per-exchange multiplicative factor applied before matrix
insertion. ``1.0`` (the default) leaves the value unchanged.
Stored as a ``rescale_array`` resource (``kind="rescale"``). Note
that the Python ``float`` value is downcast to ``numpy.float32``
when written to the structured array.
"""

row: int
Expand All @@ -71,6 +76,7 @@ class MatrixEntry:
minimum: float = math.nan
maximum: float = math.nan
negative: bool = False
rescale: float = 1.0

def __post_init__(self):
if self.uncertainty_type in _NO_UNCERTAINTY_IDS:
Expand Down Expand Up @@ -100,12 +106,16 @@ class ArrayEntry:
cols: 1-D sequence of integer column indices, one per matrix entry.
data: 2-D array of shape ``(n_entries, n_scenarios)``.
flip: Optional 1-D boolean sequence of length ``n_entries``.
rescale: Optional 1-D float array of per-entry multiplicative factors
(one per row). ``1.0`` leaves the value unchanged. Stored as a
``rescale_array`` resource (``kind="rescale"``).
"""

rows: np.ndarray
cols: np.ndarray
data: np.ndarray
flip: Optional[np.ndarray] = None
rescale: Optional[np.ndarray] = None

def __post_init__(self):
self.rows = np.asarray(self.rows)
Expand Down Expand Up @@ -134,6 +144,12 @@ def __post_init__(self):
raise ValueError(
f"`flip` shape {self.flip.shape} doesn't match `rows` shape {self.rows.shape}"
)
if self.rescale is not None:
self.rescale = np.asarray(self.rescale, dtype=np.float32)
if self.rescale.shape != self.rows.shape:
raise ValueError(
f"`rescale` shape {self.rescale.shape} doesn't match `rows` shape {self.rows.shape}"
)


def create_datapackage_from_entries(
Expand Down
6 changes: 5 additions & 1 deletion src/bw_processing/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -64,6 +64,7 @@ def dictionary_formatter(row: dict) -> tuple:
row.get("maximum", np.nan),
row.get("negative", False),
row.get("flip", False),
row.get("rescale", 1.0),
)


Expand All @@ -73,11 +74,13 @@ def resolve_dict_iterator(iterator: Any, nrows: int = None) -> tuple:
data = (dictionary_formatter(row) for row in iterator)
array = create_structured_array(
data,
INDICES_DTYPE + [("amount", np.float32)] + UNCERTAINTY_DTYPE + [("flip", bool)],
INDICES_DTYPE + [("amount", np.float32)] + UNCERTAINTY_DTYPE + [("flip", bool), ("rescale", np.float32)],
nrows=nrows,
sort=True,
sort_fields=sort_fields,
)
rescale = array["rescale"]
rescale_array = rescale if (rescale != 1.0).any() else None
return (
array["amount"],
# Not repacking fields would cause this multi-field index to return a view
Expand All @@ -98,6 +101,7 @@ def resolve_dict_iterator(iterator: Any, nrows: int = None) -> tuple:
]
),
array["flip"],
rescale_array,
)


Expand Down
Loading
Loading