Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
96 changes: 96 additions & 0 deletions tests/test_tools.py
Original file line number Diff line number Diff line change
Expand Up @@ -542,6 +542,102 @@ def test_nquads_export(self, pl_eq, tmp_path):
assert "<urn:uuid:" in lines[0]


class TestNquadsDatatypes:
"""With an export schema, literal attributes get xsd datatype annotations."""

@pytest.fixture(scope="class")
def nquads_lines(self, svedala_eq, tmp_path_factory):
from triplets.export_schema import schemas
output = str(tmp_path_factory.mktemp("nq") / "typed.nq")
triplets.export.export_to_nquads(svedala_eq, output, rdf_map=schemas.ENTSOE_CGMES_3_0_0_552_ED1)
with open(output) as f:
return f.readlines()

def test_numeric_literal_gets_datatype(self, nquads_lines):
length_lines = [l for l in nquads_lines if "Conductor.length" in l]
assert length_lines
assert all('^^<http://www.w3.org/2001/XMLSchema#float>' in l for l in length_lines)

def test_reference_keys_stay_iris(self, nquads_lines):
# xsd:anyURI keys (e.g. Model.DependentOn) are references, not typed literals
dep_lines = [l for l in nquads_lines if "Model.DependentOn" in l]
assert dep_lines
assert all("^^" not in l for l in dep_lines)

def test_rdflib_parses_export(self, nquads_lines, tmp_path):
rdflib = pytest.importorskip("rdflib")
path = tmp_path / "validate.nq"
path.write_text("".join(nquads_lines))

dataset = rdflib.Dataset()
dataset.parse(str(path), format="nquads")
assert len(dataset) == len(nquads_lines)

# typed literals round-trip through rdflib with the right python type
length_predicate = rdflib.URIRef("http://iec.ch/TC57/CIM100#Conductor.length")
lengths = [obj for _, _, obj, _ in dataset.quads((None, length_predicate, None, None))]
assert lengths
for literal in lengths:
assert literal.datatype == rdflib.XSD.float
assert isinstance(literal.toPython(), float)

def test_references_resolve_within_dataset(self, svedala_eq, nquads_lines, tmp_path):
"""Every urn:uuid reference resolves to a subject — except the references
the source data itself knows are dangling (boundary objects, other models)."""
rdflib = pytest.importorskip("rdflib")
from triplets import cgmes_tools

path = tmp_path / "refs.nq"
path.write_text("".join(nquads_lines))
dataset = rdflib.Dataset()
dataset.parse(str(path), format="nquads")

subjects = set()
uuid_objects = set()
for s, _, o, _ in dataset.quads((None, None, None, None)):
subjects.add(str(s))
if isinstance(o, rdflib.URIRef) and str(o).startswith("urn:uuid:"):
uuid_objects.add(str(o))
unresolved = {o.removeprefix("urn:uuid:") for o in uuid_objects - subjects}

dangling = cgmes_tools.get_dangling_references(svedala_eq, detailed=True)
known_dangling = set(dangling["VALUE_FROM"].astype(str))

assert unresolved, "single EQ file must have boundary references"
assert unresolved == unresolved & known_dangling, \
f"references neither resolved nor known-dangling: {sorted(unresolved - known_dangling)[:5]}"

def test_string_literal_stays_plain(self, nquads_lines):
# xsd:string is the RDF 1.1 default — no annotation
name_lines = [l for l in nquads_lines if "IdentifiedObject.name>" in l]
assert name_lines
assert all("^^" not in l for l in name_lines)

def test_mrid_is_literal_not_reference(self, nquads_lines):
# mRID is a string attribute by schema; the UUID heuristic must not turn it into a urn:uuid reference
mrid_lines = [l for l in nquads_lines if "IdentifiedObject.mRID>" in l]
assert mrid_lines
for line in mrid_lines:
obj = line.split("> ", 2)[2] # object + graph part after subject and predicate
assert obj.startswith('"'), line

def test_without_schema_no_datatypes(self, svedala_eq, tmp_path):
output = str(tmp_path / "untyped.nq")
triplets.export.export_to_nquads(svedala_eq, output)
with open(output) as f:
content = f.read()
assert "^^<" not in content

def test_polars_engine_matches_pandas(self, svedala_eq, tmp_path, nquads_lines):
polars = pytest.importorskip("polars")
from triplets.export_schema import schemas
output = str(tmp_path / "typed_pl.nq")
triplets.export.export_to_nquads(polars.from_pandas(svedala_eq), output, rdf_map=schemas.ENTSOE_CGMES_3_0_0_552_ED1)
with open(output) as f:
pl_lines = f.readlines()
assert sorted(pl_lines) == sorted(nquads_lines)


# ── Roundtrip test (export CIM XML → reimport → compare) ────────────────────

class TestCimxmlRoundtrip:
Expand Down
9 changes: 5 additions & 4 deletions triplets/export/nquads_pandas.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,10 +18,11 @@ def export_to_nquads(data, path, rdf_map=None):
path : str
Output file path (.nq).
rdf_map : dict or str, optional
Export schema for proper enum/association detection.
If None, enumerations won't get namespace (exported as literals).
Export schema for proper enum/association detection and literal
datatype annotations ("400"^^<...XMLSchema#float>). If None,
enumerations won't get namespace and literals stay untyped.
"""
enum_keys, key_namespaces = build_key_metadata(rdf_map) if rdf_map else (set(), {})
enum_keys, key_namespaces, key_datatypes = build_key_metadata(rdf_map) if rdf_map else (set(), {}, {})

id_col = data["ID"].astype(str)
key_col = data["KEY"].astype(str)
Expand All @@ -31,7 +32,7 @@ def export_to_nquads(data, path, rdf_map=None):
subjects = id_col.apply(make_subject)
predicates = key_col.apply(lambda k: make_predicate(k, key_namespaces))
objects = pandas.Series(
[make_object(k, v, enum_keys) for k, v in zip(key_col, val_col)],
[make_object(k, v, enum_keys, key_datatypes) for k, v in zip(key_col, val_col)],
index=data.index,
)
graphs = inst_col.apply(make_graph)
Expand Down
7 changes: 4 additions & 3 deletions triplets/export/nquads_polars.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,16 +22,17 @@ def export_to_nquads(data, path, rdf_map=None):
path : str
Output file path (.nq).
rdf_map : dict or str, optional
Export schema for proper enum/association detection.
Export schema for proper enum/association detection and literal
datatype annotations ("400"^^<...XMLSchema#float>).
"""
enum_keys, key_namespaces = build_key_metadata(rdf_map) if rdf_map else (set(), {})
enum_keys, key_namespaces, key_datatypes = build_key_metadata(rdf_map) if rdf_map else (set(), {}, {})

# Build quads row by row (complex classification can't be fully vectorized)
quads = []
for row in data.iter_rows(named=True):
s = make_subject(str(row["ID"]))
p = make_predicate(str(row["KEY"]), key_namespaces)
o = make_object(str(row["KEY"]), str(row["VALUE"]), enum_keys)
o = make_object(str(row["KEY"]), str(row["VALUE"]), enum_keys, key_datatypes)
g = make_graph(str(row["INSTANCE_ID"]))
quads.append(f"{s} {p} {o} {g} .")

Expand Down
28 changes: 25 additions & 3 deletions triplets/export/nquads_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,12 +8,13 @@

CIM_NS = "http://iec.ch/TC57/CIM100#"
RDF_TYPE = "http://www.w3.org/1999/02/22-rdf-syntax-ns#type"
XSD_NS = "http://www.w3.org/2001/XMLSchema#"

UUID_RE = re.compile(r'^[0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12}$')


def build_key_metadata(rdf_map):
"""Extract enum keys and key→namespace mapping from export schema.
"""Extract enum keys, key→namespace, and key→datatype mappings from export schema.

Parameters
----------
Expand All @@ -26,13 +27,19 @@ def build_key_metadata(rdf_map):
KEY names whose values are enumerations (need namespace on VALUE).
key_namespaces : dict
KEY name → namespace URI for predicate construction.
key_datatypes : dict
KEY name → full xsd datatype URI (from the schema's "xsd:type",
e.g. "xsd:float" → "http://www.w3.org/2001/XMLSchema#float").
A key present here is a literal attribute by schema. xsd:string
keys map to None: literal, but no annotation (RDF 1.1 default).
"""
if not isinstance(rdf_map, dict):
with open(str(rdf_map)) as f:
rdf_map = json.load(f)

enum_keys = set()
key_namespaces = {}
key_datatypes = {}

for profile_name, profile_data in rdf_map.items():
if not isinstance(profile_data, dict):
Expand All @@ -42,13 +49,19 @@ def build_key_metadata(rdf_map):
continue
prop_type = prop_data.get("type")
namespace = prop_data.get("namespace", CIM_NS)
xsd_type = prop_data.get("xsd:type")

if prop_type == "Enumeration":
enum_keys.add(prop_name)
if namespace:
key_namespaces[prop_name] = namespace
if xsd_type and xsd_type.startswith("xsd:"):
datatype = xsd_type.removeprefix("xsd:")
if datatype == "anyURI":
continue # references (e.g. Model.DependentOn) — keep IRI handling
key_datatypes[prop_name] = None if datatype == "string" else f"{XSD_NS}{datatype}"

return enum_keys, key_namespaces
return enum_keys, key_namespaces, key_datatypes


def make_subject(id_val):
Expand All @@ -68,13 +81,16 @@ def make_predicate(key, key_namespaces=None):
return f"<{ns}{key}>"


def make_object(key, value, enum_keys=None):
def make_object(key, value, enum_keys=None, key_datatypes=None):
"""Convert VALUE to object (URI or literal).

Rules:
- Type row → <namespace#ClassName>
- Already starts with http/https/urn → <value> (pass through)
- Enum KEY → <namespace#EnumValue>
- KEY with schema datatype → "literal"^^<xsd type> (plain for xsd:string);
takes precedence over the UUID heuristic (e.g. IdentifiedObject.mRID is
a string attribute, not a reference)
- UUID pattern → <urn:uuid:value>
- Everything else → "literal" (with escaping)
"""
Expand All @@ -91,6 +107,12 @@ def make_object(key, value, enum_keys=None):
if enum_keys and key in enum_keys:
return f"<{CIM_NS}{value}>"

# Literal attribute by schema — annotate with its xsd datatype
if key_datatypes and key in key_datatypes:
escaped = value.replace('\\', '\\\\').replace('"', '\\"').replace('\n', '\\n')
datatype = key_datatypes[key]
return f'"{escaped}"^^<{datatype}>' if datatype else f'"{escaped}"'

# UUID reference
if UUID_RE.match(value):
return f"<urn:uuid:{value}>"
Expand Down
Loading