diff --git a/tests/test_tools.py b/tests/test_tools.py index bcf0658..210bcbf 100644 --- a/tests/test_tools.py +++ b/tests/test_tools.py @@ -455,6 +455,45 @@ def test_missing_columns_raise(self, svedala_eq, tmp_path): with pytest.raises(ValueError, match="missing columns.*VALUE"): triplets.export.export_to_nquads(broken, str(tmp_path / "x.nq")) + def test_polars_input_exports(self, svedala_eq): + """Polars input converts to pandas at the orchestrator and exports. + + Note: the mixed string/int VALUE problem from issue #50 cannot occur + with polars input — polars columns are strictly typed, so a mixed + object VALUE column is rejected at frame construction already. + """ + polars = pytest.importorskip("polars") + from triplets.export_schema import schemas + result = polars.from_pandas(svedala_eq).triplets.export_to_cimxml( + rdf_map=schemas.ENTSOE_CGMES_3_0_0_552_ED1, + export_type="xml_per_instance", + export_to_memory=True, + ) + result[0].seek(0) + reimported = pandas.read_RDF([result[0]]) + assert reimported.get_types_count()["ACLineSegment"] == svedala_eq.get_types_count()["ACLineSegment"] + + def test_duckdb_input_exports(self, svedala_eq, tmp_path): + """DuckDB exports fetch the triplets table into pandas and export. + + As with polars, DuckDB columns are strictly typed (VALUE is VARCHAR), + so the mixed string/int case from issue #50 cannot occur here. + """ + duckdb = pytest.importorskip("duckdb") + from triplets.export_schema import schemas + con = duckdb.connect() + con.register("source", svedala_eq) + con.execute("CREATE TABLE triplets AS SELECT * FROM source") + + result = con.export_to_cimxml( + rdf_map=schemas.ENTSOE_CGMES_3_0_0_552_ED1, + export_type="xml_per_instance", + export_to_memory=True, + ) + result[0].seek(0) + reimported = pandas.read_RDF([result[0]]) + assert reimported.get_types_count()["ACLineSegment"] == svedala_eq.get_types_count()["ACLineSegment"] + def test_engines_produce_identical_output(self, svedala_eq): require_cimxml_engine("cython_pugixml") from triplets.export_schema import schemas diff --git a/triplets/_accessor.py b/triplets/_accessor.py index 8d0eac4..2d28e24 100644 --- a/triplets/_accessor.py +++ b/triplets/_accessor.py @@ -83,7 +83,7 @@ "references_to", "references_from", ] -DUCKDB_EXPORT_METHODS = ["export_to_excel", "export_to_csv", "export_to_nquads"] +DUCKDB_EXPORT_METHODS = ["export_to_excel", "export_to_csv", "export_to_nquads", "export_to_cimxml"] def _delegate(module, name): diff --git a/triplets/export/__init__.py b/triplets/export/__init__.py index 49606e8..2b38dae 100644 --- a/triplets/export/__init__.py +++ b/triplets/export/__init__.py @@ -204,6 +204,10 @@ def export_to_cimxml(data, init_time = start_time _check_columns(data) + if _is_polars(data): + # the per-instance pipeline is pandas (groupby + engine contract) + logger.debug("format=cimxml: polars input → pandas") + data = data.to_pandas(use_pyarrow_extension_array=True) engine_name, engine_module = get_cimxml_engine(engine) generate = engine_module.generate_xml