From 0446d21ecaa24219ddf8eaad92f61a7cf471e7b7 Mon Sep 17 00:00:00 2001 From: Kristjan Vilgo Date: Fri, 12 Jun 2026 17:36:05 +0300 Subject: [PATCH] fix: enforce triplets string-or-null invariant in tools MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Closes #55 ID/KEY/VALUE columns are always strings or null — never raw numbers or stringified None. Three leaks fixed: - pandas tableview_to_triplets: astype(str) made melt's NaN holes into literal "nan" strings (older pandas) or left float nan objects mixed in (pandas 3). Now casts to nullable "string" dtype — numbers become text, nulls stay null, matching the polars engine's Utf8 cast. - pandas set_triplets_value_by_key(_and_id): raw value assignment let ints into VALUE (and crashes on pandas 3 str-dtype columns). Values normalize via str(), None stays null. - polars set_triplets_value_by_key(_and_id): str(None) stored the literal string "None"; now a typed null literal. Engine-parity tests for roundtrip null handling, number stringification and set-value normalization. --- tests/test_tools.py | 44 +++++++++++++++++++++++++++++++++ triplets/tools/pandas_engine.py | 13 +++++++--- triplets/tools/polars_engine.py | 9 +++++-- 3 files changed, 61 insertions(+), 5 deletions(-) diff --git a/tests/test_tools.py b/tests/test_tools.py index 210bcbf..01d745b 100644 --- a/tests/test_tools.py +++ b/tests/test_tools.py @@ -348,6 +348,50 @@ def test_all_aliases_resolve(self): assert callable(getattr(triplets.tools, new_name)), new_name +class TestTripletsStringInvariant: + """ID/KEY/VALUE are always strings or null — never mixed with numbers (issue #55).""" + + def test_pandas_roundtrip_keeps_nulls_null(self): + tableview = pandas.DataFrame( + {"ID": ["a", "b"], "Type": ["T", "T"], "x.y": ["1", None]} + ).set_index("ID") + trip = triplets.tools.tableview_to_triplets(tableview, engine="pandas") + hole = trip[(trip["ID"] == "b") & (trip["KEY"] == "x.y")]["VALUE"] + assert hole.isna().all() # null stays null, not "nan" + non_null = trip["VALUE"].dropna() + assert all(isinstance(v, str) for v in non_null) + + def test_pandas_roundtrip_stringifies_numbers(self, svedala_eq): + tableview = svedala_eq.tableview_by_type("ACLineSegment", string_to_number=True) + trip = triplets.tools.tableview_to_triplets(tableview, engine="pandas") + non_null = trip["VALUE"].dropna() + assert all(isinstance(v, str) for v in non_null) + + @pytest.mark.parametrize("engine", ["pandas", "polars"]) + def test_set_value_int_becomes_string(self, engine): + frame = pandas.DataFrame({"ID": ["a"], "KEY": ["k"], "VALUE": ["old"], "INSTANCE_ID": ["i"]}) + if engine == "polars": + polars = pytest.importorskip("polars") + data = polars.from_pandas(frame) + result = triplets.tools.set_triplets_value_by_key(data, "k", 42) + assert result["VALUE"][0] == "42" + else: + triplets.tools.set_triplets_value_by_key(frame, "k", 42) + assert frame["VALUE"].iloc[0] == "42" + + @pytest.mark.parametrize("engine", ["pandas", "polars"]) + def test_set_value_none_stays_null(self, engine): + frame = pandas.DataFrame({"ID": ["a"], "KEY": ["k"], "VALUE": ["old"], "INSTANCE_ID": ["i"]}) + if engine == "polars": + polars = pytest.importorskip("polars") + data = polars.from_pandas(frame) + result = triplets.tools.set_triplets_value_by_key(data, "k", None) + assert result["VALUE"][0] is None # not the string "None" + else: + triplets.tools.set_triplets_value_by_key(frame, "k", None) + assert pandas.isna(frame["VALUE"].iloc[0]) + + class TestConvenienceAliases: """First-class aliases (no deprecation) that group functions by prefix for IDE autocomplete.""" diff --git a/triplets/tools/pandas_engine.py b/triplets/tools/pandas_engine.py index 109f4f4..06145ff 100644 --- a/triplets/tools/pandas_engine.py +++ b/triplets/tools/pandas_engine.py @@ -532,6 +532,11 @@ def types_dict(data): return types_dictionary +def _string_or_none(value): + """VALUE entries are strings or null — never the string "None" or raw numbers.""" + return None if value is None else str(value) + + def set_triplets_value_by_key(data, key, value): """Set the value for all instances of a specified key. @@ -553,7 +558,7 @@ def set_triplets_value_by_key(data, key, value): -------- >>> data.set_triplets_value_by_key("label", "new_label") """ - data.loc[data[data.KEY == key].index, "VALUE"] = value # TODO add changes to change DataFrame + data.loc[data[data.KEY == key].index, "VALUE"] = _string_or_none(value) # TODO add changes to change DataFrame def set_triplets_value_by_key_and_id(data, key, value, id): @@ -574,7 +579,7 @@ def set_triplets_value_by_key_and_id(data, key, value, id): -------- >>> data.set_triplets_value_by_key_and_id("label", "new_label", "uuid1") """ - data.loc[data[(data.ID == id) & (data.KEY == key)].index, "VALUE"] = value + data.loc[data[(data.ID == id) & (data.KEY == key)].index, "VALUE"] = _string_or_none(value) def triplets_to_tableviews(triplet_df, multivalue=False): @@ -682,7 +687,9 @@ def _ensure_list(val): triplet_df["VALUE"] = triplet_df["VALUE"].apply(_ensure_list) triplet_df = triplet_df.explode("VALUE") - return triplet_df.astype(str) + # nullable string dtype: numbers become text, melt's NaN holes stay null + # (plain astype(str) made them literal "nan" strings / mixed nan objects) + return triplet_df.astype("string") def update_triplets_from_triplets(data, update_data, update=True, add=True): diff --git a/triplets/tools/polars_engine.py b/triplets/tools/polars_engine.py index 3e97096..6c40ad8 100644 --- a/triplets/tools/polars_engine.py +++ b/triplets/tools/polars_engine.py @@ -278,11 +278,16 @@ def filter_triplets(data, ID=None, KEY=None, VALUE=None, INSTANCE_ID=None, regex return data.filter(expr) +def _value_literal(value): + """VALUE entries are strings or null — never the string \"None\" or raw numbers.""" + return pl.lit(None, dtype=pl.Utf8) if value is None else pl.lit(str(value)) + + def set_triplets_value_by_key(data, key, value): """Set VALUE for all rows with a given KEY (in-place mutation via reassignment).""" return data.with_columns( pl.when(pl.col("KEY") == key) - .then(pl.lit(str(value))) + .then(_value_literal(value)) .otherwise(pl.col("VALUE")) .alias("VALUE") ) @@ -292,7 +297,7 @@ def set_triplets_value_by_key_and_id(data, key, value, id): """Set VALUE for a specific KEY and ID combination.""" return data.with_columns( pl.when((pl.col("KEY") == key) & (pl.col("ID") == id)) - .then(pl.lit(str(value))) + .then(_value_literal(value)) .otherwise(pl.col("VALUE")) .alias("VALUE") )