Haigutus · Haigutus · Jun 11, 2026 · Jun 11, 2026 · Jun 11, 2026
diff --git a/tests/test_tools.py b/tests/test_tools.py
@@ -542,6 +542,102 @@ def test_nquads_export(self, pl_eq, tmp_path):
         assert "<urn:uuid:" in lines[0]
 
 
+class TestNquadsDatatypes:
+    """With an export schema, literal attributes get xsd datatype annotations."""
+
+    @pytest.fixture(scope="class")
+    def nquads_lines(self, svedala_eq, tmp_path_factory):
+        from triplets.export_schema import schemas
+        output = str(tmp_path_factory.mktemp("nq") / "typed.nq")
+        triplets.export.export_to_nquads(svedala_eq, output, rdf_map=schemas.ENTSOE_CGMES_3_0_0_552_ED1)
+        with open(output) as f:
+            return f.readlines()
+
+    def test_numeric_literal_gets_datatype(self, nquads_lines):
+        length_lines = [l for l in nquads_lines if "Conductor.length" in l]
+        assert length_lines
+        assert all('^^<http://www.w3.org/2001/XMLSchema#float>' in l for l in length_lines)
+
+    def test_reference_keys_stay_iris(self, nquads_lines):
+        # xsd:anyURI keys (e.g. Model.DependentOn) are references, not typed literals
+        dep_lines = [l for l in nquads_lines if "Model.DependentOn" in l]
+        assert dep_lines
+        assert all("^^" not in l for l in dep_lines)
+
+    def test_rdflib_parses_export(self, nquads_lines, tmp_path):
+        rdflib = pytest.importorskip("rdflib")
+        path = tmp_path / "validate.nq"
+        path.write_text("".join(nquads_lines))
+
+        dataset = rdflib.Dataset()
+        dataset.parse(str(path), format="nquads")
+        assert len(dataset) == len(nquads_lines)
+
+        # typed literals round-trip through rdflib with the right python type
+        length_predicate = rdflib.URIRef("http://iec.ch/TC57/CIM100#Conductor.length")
+        lengths = [obj for _, _, obj, _ in dataset.quads((None, length_predicate, None, None))]
+        assert lengths
+        for literal in lengths:
+            assert literal.datatype == rdflib.XSD.float
+            assert isinstance(literal.toPython(), float)
+
+    def test_references_resolve_within_dataset(self, svedala_eq, nquads_lines, tmp_path):
+        """Every urn:uuid reference resolves to a subject — except the references
+        the source data itself knows are dangling (boundary objects, other models)."""
+        rdflib = pytest.importorskip("rdflib")
+        from triplets import cgmes_tools
+
+        path = tmp_path / "refs.nq"
+        path.write_text("".join(nquads_lines))
+        dataset = rdflib.Dataset()
+        dataset.parse(str(path), format="nquads")
+
+        subjects = set()
+        uuid_objects = set()
+        for s, _, o, _ in dataset.quads((None, None, None, None)):
+            subjects.add(str(s))
+            if isinstance(o, rdflib.URIRef) and str(o).startswith("urn:uuid:"):
+                uuid_objects.add(str(o))
+        unresolved = {o.removeprefix("urn:uuid:") for o in uuid_objects - subjects}
+
+        dangling = cgmes_tools.get_dangling_references(svedala_eq, detailed=True)
+        known_dangling = set(dangling["VALUE_FROM"].astype(str))
+
+        assert unresolved, "single EQ file must have boundary references"
+        assert unresolved == unresolved & known_dangling, \
+            f"references neither resolved nor known-dangling: {sorted(unresolved - known_dangling)[:5]}"
+
+    def test_string_literal_stays_plain(self, nquads_lines):
+        # xsd:string is the RDF 1.1 default — no annotation
+        name_lines = [l for l in nquads_lines if "IdentifiedObject.name>" in l]
+        assert name_lines
+        assert all("^^" not in l for l in name_lines)
+
+    def test_mrid_is_literal_not_reference(self, nquads_lines):
+        # mRID is a string attribute by schema; the UUID heuristic must not turn it into a urn:uuid reference
+        mrid_lines = [l for l in nquads_lines if "IdentifiedObject.mRID>" in l]
+        assert mrid_lines
+        for line in mrid_lines:
+            obj = line.split("> ", 2)[2]  # object + graph part after subject and predicate
+            assert obj.startswith('"'), line
+
+    def test_without_schema_no_datatypes(self, svedala_eq, tmp_path):
+        output = str(tmp_path / "untyped.nq")
+        triplets.export.export_to_nquads(svedala_eq, output)
+        with open(output) as f:
+            content = f.read()
+        assert "^^<" not in content
+
+    def test_polars_engine_matches_pandas(self, svedala_eq, tmp_path, nquads_lines):
+        polars = pytest.importorskip("polars")
+        from triplets.export_schema import schemas
+        output = str(tmp_path / "typed_pl.nq")
+        triplets.export.export_to_nquads(polars.from_pandas(svedala_eq), output, rdf_map=schemas.ENTSOE_CGMES_3_0_0_552_ED1)
+        with open(output) as f:
+            pl_lines = f.readlines()
+        assert sorted(pl_lines) == sorted(nquads_lines)
+
+
 # ── Roundtrip test (export CIM XML → reimport → compare) ────────────────────
 
 class TestCimxmlRoundtrip:

diff --git a/triplets/export/nquads_pandas.py b/triplets/export/nquads_pandas.py
@@ -18,10 +18,11 @@ def export_to_nquads(data, path, rdf_map=None):
     path : str
         Output file path (.nq).
     rdf_map : dict or str, optional
-        Export schema for proper enum/association detection.
-        If None, enumerations won't get namespace (exported as literals).
+        Export schema for proper enum/association detection and literal
+        datatype annotations ("400"^^<...XMLSchema#float>). If None,
+        enumerations won't get namespace and literals stay untyped.
     """
-    enum_keys, key_namespaces = build_key_metadata(rdf_map) if rdf_map else (set(), {})
+    enum_keys, key_namespaces, key_datatypes = build_key_metadata(rdf_map) if rdf_map else (set(), {}, {})
 
     id_col = data["ID"].astype(str)
     key_col = data["KEY"].astype(str)
@@ -31,7 +32,7 @@ def export_to_nquads(data, path, rdf_map=None):
     subjects = id_col.apply(make_subject)
     predicates = key_col.apply(lambda k: make_predicate(k, key_namespaces))
     objects = pandas.Series(
-        [make_object(k, v, enum_keys) for k, v in zip(key_col, val_col)],
+        [make_object(k, v, enum_keys, key_datatypes) for k, v in zip(key_col, val_col)],
         index=data.index,
     )
     graphs = inst_col.apply(make_graph)

diff --git a/triplets/export/nquads_polars.py b/triplets/export/nquads_polars.py
@@ -22,16 +22,17 @@ def export_to_nquads(data, path, rdf_map=None):
     path : str
         Output file path (.nq).
     rdf_map : dict or str, optional
-        Export schema for proper enum/association detection.
+        Export schema for proper enum/association detection and literal
+        datatype annotations ("400"^^<...XMLSchema#float>).
     """
-    enum_keys, key_namespaces = build_key_metadata(rdf_map) if rdf_map else (set(), {})
+    enum_keys, key_namespaces, key_datatypes = build_key_metadata(rdf_map) if rdf_map else (set(), {}, {})
 
     # Build quads row by row (complex classification can't be fully vectorized)
     quads = []
     for row in data.iter_rows(named=True):
         s = make_subject(str(row["ID"]))
         p = make_predicate(str(row["KEY"]), key_namespaces)
-        o = make_object(str(row["KEY"]), str(row["VALUE"]), enum_keys)
+        o = make_object(str(row["KEY"]), str(row["VALUE"]), enum_keys, key_datatypes)
         g = make_graph(str(row["INSTANCE_ID"]))
         quads.append(f"{s} {p} {o} {g} .")
 

diff --git a/triplets/export/nquads_utils.py b/triplets/export/nquads_utils.py
@@ -8,12 +8,13 @@
 
 CIM_NS = "http://iec.ch/TC57/CIM100#"
 RDF_TYPE = "http://www.w3.org/1999/02/22-rdf-syntax-ns#type"
+XSD_NS = "http://www.w3.org/2001/XMLSchema#"
 
 UUID_RE = re.compile(r'^[0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12}$')
 
 
 def build_key_metadata(rdf_map):
-    """Extract enum keys and key→namespace mapping from export schema.
+    """Extract enum keys, key→namespace, and key→datatype mappings from export schema.
 
     Parameters
     ----------
@@ -26,13 +27,19 @@ def build_key_metadata(rdf_map):
         KEY names whose values are enumerations (need namespace on VALUE).
     key_namespaces : dict
         KEY name → namespace URI for predicate construction.
+    key_datatypes : dict
+        KEY name → full xsd datatype URI (from the schema's "xsd:type",
+        e.g. "xsd:float" → "http://www.w3.org/2001/XMLSchema#float").
+        A key present here is a literal attribute by schema. xsd:string
+        keys map to None: literal, but no annotation (RDF 1.1 default).
     """
     if not isinstance(rdf_map, dict):
         with open(str(rdf_map)) as f:
             rdf_map = json.load(f)
 
     enum_keys = set()
     key_namespaces = {}
+    key_datatypes = {}
 
     for profile_name, profile_data in rdf_map.items():
         if not isinstance(profile_data, dict):
@@ -42,13 +49,19 @@ def build_key_metadata(rdf_map):
                 continue
             prop_type = prop_data.get("type")
             namespace = prop_data.get("namespace", CIM_NS)
+            xsd_type = prop_data.get("xsd:type")
 
             if prop_type == "Enumeration":
                 enum_keys.add(prop_name)
             if namespace:
                 key_namespaces[prop_name] = namespace
+            if xsd_type and xsd_type.startswith("xsd:"):
+                datatype = xsd_type.removeprefix("xsd:")
+                if datatype == "anyURI":
+                    continue  # references (e.g. Model.DependentOn) — keep IRI handling
+                key_datatypes[prop_name] = None if datatype == "string" else f"{XSD_NS}{datatype}"
 
-    return enum_keys, key_namespaces
+    return enum_keys, key_namespaces, key_datatypes
 
 
 def make_subject(id_val):
@@ -68,13 +81,16 @@ def make_predicate(key, key_namespaces=None):
     return f"<{ns}{key}>"
 
 
-def make_object(key, value, enum_keys=None):
+def make_object(key, value, enum_keys=None, key_datatypes=None):
     """Convert VALUE to object (URI or literal).
 
     Rules:
     - Type row → <namespace#ClassName>
     - Already starts with http/https/urn → <value> (pass through)
     - Enum KEY → <namespace#EnumValue>
+    - KEY with schema datatype → "literal"^^<xsd type> (plain for xsd:string);
+      takes precedence over the UUID heuristic (e.g. IdentifiedObject.mRID is
+      a string attribute, not a reference)
     - UUID pattern → <urn:uuid:value>
     - Everything else → "literal" (with escaping)
     """
@@ -91,6 +107,12 @@ def make_object(key, value, enum_keys=None):
     if enum_keys and key in enum_keys:
         return f"<{CIM_NS}{value}>"
 
+    # Literal attribute by schema — annotate with its xsd datatype
+    if key_datatypes and key in key_datatypes:
+        escaped = value.replace('\\', '\\\\').replace('"', '\\"').replace('\n', '\\n')
+        datatype = key_datatypes[key]
+        return f'"{escaped}"^^<{datatype}>' if datatype else f'"{escaped}"'
+
     # UUID reference
     if UUID_RE.match(value):
         return f"<urn:uuid:{value}>"