Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
39 changes: 39 additions & 0 deletions tests/test_tools.py
Original file line number Diff line number Diff line change
Expand Up @@ -455,6 +455,45 @@ def test_missing_columns_raise(self, svedala_eq, tmp_path):
with pytest.raises(ValueError, match="missing columns.*VALUE"):
triplets.export.export_to_nquads(broken, str(tmp_path / "x.nq"))

def test_polars_input_exports(self, svedala_eq):
"""Polars input converts to pandas at the orchestrator and exports.

Note: the mixed string/int VALUE problem from issue #50 cannot occur
with polars input — polars columns are strictly typed, so a mixed
object VALUE column is rejected at frame construction already.
"""
polars = pytest.importorskip("polars")
from triplets.export_schema import schemas
result = polars.from_pandas(svedala_eq).triplets.export_to_cimxml(
rdf_map=schemas.ENTSOE_CGMES_3_0_0_552_ED1,
export_type="xml_per_instance",
export_to_memory=True,
)
result[0].seek(0)
reimported = pandas.read_RDF([result[0]])
assert reimported.get_types_count()["ACLineSegment"] == svedala_eq.get_types_count()["ACLineSegment"]

def test_duckdb_input_exports(self, svedala_eq, tmp_path):
"""DuckDB exports fetch the triplets table into pandas and export.

As with polars, DuckDB columns are strictly typed (VALUE is VARCHAR),
so the mixed string/int case from issue #50 cannot occur here.
"""
duckdb = pytest.importorskip("duckdb")
from triplets.export_schema import schemas
con = duckdb.connect()
con.register("source", svedala_eq)
con.execute("CREATE TABLE triplets AS SELECT * FROM source")

result = con.export_to_cimxml(
rdf_map=schemas.ENTSOE_CGMES_3_0_0_552_ED1,
export_type="xml_per_instance",
export_to_memory=True,
)
result[0].seek(0)
reimported = pandas.read_RDF([result[0]])
assert reimported.get_types_count()["ACLineSegment"] == svedala_eq.get_types_count()["ACLineSegment"]

def test_engines_produce_identical_output(self, svedala_eq):
require_cimxml_engine("cython_pugixml")
from triplets.export_schema import schemas
Expand Down
2 changes: 1 addition & 1 deletion triplets/_accessor.py
Original file line number Diff line number Diff line change
Expand Up @@ -83,7 +83,7 @@
"references_to", "references_from",
]

DUCKDB_EXPORT_METHODS = ["export_to_excel", "export_to_csv", "export_to_nquads"]
DUCKDB_EXPORT_METHODS = ["export_to_excel", "export_to_csv", "export_to_nquads", "export_to_cimxml"]


def _delegate(module, name):
Expand Down
4 changes: 4 additions & 0 deletions triplets/export/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -204,6 +204,10 @@ def export_to_cimxml(data,
init_time = start_time

_check_columns(data)
if _is_polars(data):
# the per-instance pipeline is pandas (groupby + engine contract)
logger.debug("format=cimxml: polars input → pandas")
data = data.to_pandas(use_pyarrow_extension_array=True)
engine_name, engine_module = get_cimxml_engine(engine)
generate = engine_module.generate_xml

Expand Down
Loading