Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
44 changes: 44 additions & 0 deletions tests/test_tools.py
Original file line number Diff line number Diff line change
Expand Up @@ -348,6 +348,50 @@ def test_all_aliases_resolve(self):
assert callable(getattr(triplets.tools, new_name)), new_name


class TestTripletsStringInvariant:
"""ID/KEY/VALUE are always strings or null — never mixed with numbers (issue #55)."""

def test_pandas_roundtrip_keeps_nulls_null(self):
tableview = pandas.DataFrame(
{"ID": ["a", "b"], "Type": ["T", "T"], "x.y": ["1", None]}
).set_index("ID")
trip = triplets.tools.tableview_to_triplets(tableview, engine="pandas")
hole = trip[(trip["ID"] == "b") & (trip["KEY"] == "x.y")]["VALUE"]
assert hole.isna().all() # null stays null, not "nan"
non_null = trip["VALUE"].dropna()
assert all(isinstance(v, str) for v in non_null)

def test_pandas_roundtrip_stringifies_numbers(self, svedala_eq):
tableview = svedala_eq.tableview_by_type("ACLineSegment", string_to_number=True)
trip = triplets.tools.tableview_to_triplets(tableview, engine="pandas")
non_null = trip["VALUE"].dropna()
assert all(isinstance(v, str) for v in non_null)

@pytest.mark.parametrize("engine", ["pandas", "polars"])
def test_set_value_int_becomes_string(self, engine):
frame = pandas.DataFrame({"ID": ["a"], "KEY": ["k"], "VALUE": ["old"], "INSTANCE_ID": ["i"]})
if engine == "polars":
polars = pytest.importorskip("polars")
data = polars.from_pandas(frame)
result = triplets.tools.set_triplets_value_by_key(data, "k", 42)
assert result["VALUE"][0] == "42"
else:
triplets.tools.set_triplets_value_by_key(frame, "k", 42)
assert frame["VALUE"].iloc[0] == "42"

@pytest.mark.parametrize("engine", ["pandas", "polars"])
def test_set_value_none_stays_null(self, engine):
frame = pandas.DataFrame({"ID": ["a"], "KEY": ["k"], "VALUE": ["old"], "INSTANCE_ID": ["i"]})
if engine == "polars":
polars = pytest.importorskip("polars")
data = polars.from_pandas(frame)
result = triplets.tools.set_triplets_value_by_key(data, "k", None)
assert result["VALUE"][0] is None # not the string "None"
else:
triplets.tools.set_triplets_value_by_key(frame, "k", None)
assert pandas.isna(frame["VALUE"].iloc[0])


class TestConvenienceAliases:
"""First-class aliases (no deprecation) that group functions by prefix for IDE autocomplete."""

Expand Down
13 changes: 10 additions & 3 deletions triplets/tools/pandas_engine.py
Original file line number Diff line number Diff line change
Expand Up @@ -532,6 +532,11 @@ def types_dict(data):
return types_dictionary


def _string_or_none(value):
"""VALUE entries are strings or null — never the string "None" or raw numbers."""
return None if value is None else str(value)


def set_triplets_value_by_key(data, key, value):
"""Set the value for all instances of a specified key.

Expand All @@ -553,7 +558,7 @@ def set_triplets_value_by_key(data, key, value):
--------
>>> data.set_triplets_value_by_key("label", "new_label")
"""
data.loc[data[data.KEY == key].index, "VALUE"] = value # TODO add changes to change DataFrame
data.loc[data[data.KEY == key].index, "VALUE"] = _string_or_none(value) # TODO add changes to change DataFrame


def set_triplets_value_by_key_and_id(data, key, value, id):
Expand All @@ -574,7 +579,7 @@ def set_triplets_value_by_key_and_id(data, key, value, id):
--------
>>> data.set_triplets_value_by_key_and_id("label", "new_label", "uuid1")
"""
data.loc[data[(data.ID == id) & (data.KEY == key)].index, "VALUE"] = value
data.loc[data[(data.ID == id) & (data.KEY == key)].index, "VALUE"] = _string_or_none(value)


def triplets_to_tableviews(triplet_df, multivalue=False):
Expand Down Expand Up @@ -682,7 +687,9 @@ def _ensure_list(val):
triplet_df["VALUE"] = triplet_df["VALUE"].apply(_ensure_list)
triplet_df = triplet_df.explode("VALUE")

return triplet_df.astype(str)
# nullable string dtype: numbers become text, melt's NaN holes stay null
# (plain astype(str) made them literal "nan" strings / mixed nan objects)
return triplet_df.astype("string")


def update_triplets_from_triplets(data, update_data, update=True, add=True):
Expand Down
9 changes: 7 additions & 2 deletions triplets/tools/polars_engine.py
Original file line number Diff line number Diff line change
Expand Up @@ -278,11 +278,16 @@ def filter_triplets(data, ID=None, KEY=None, VALUE=None, INSTANCE_ID=None, regex
return data.filter(expr)


def _value_literal(value):
"""VALUE entries are strings or null — never the string \"None\" or raw numbers."""
return pl.lit(None, dtype=pl.Utf8) if value is None else pl.lit(str(value))


def set_triplets_value_by_key(data, key, value):
"""Set VALUE for all rows with a given KEY (in-place mutation via reassignment)."""
return data.with_columns(
pl.when(pl.col("KEY") == key)
.then(pl.lit(str(value)))
.then(_value_literal(value))
.otherwise(pl.col("VALUE"))
.alias("VALUE")
)
Expand All @@ -292,7 +297,7 @@ def set_triplets_value_by_key_and_id(data, key, value, id):
"""Set VALUE for a specific KEY and ID combination."""
return data.with_columns(
pl.when((pl.col("KEY") == key) & (pl.col("ID") == id))
.then(pl.lit(str(value)))
.then(_value_literal(value))
.otherwise(pl.col("VALUE"))
.alias("VALUE")
)
Expand Down
Loading