diff --git a/CMakeLists.txt b/CMakeLists.txt index 47640ba4..498f576c 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -63,6 +63,8 @@ set(EXTENSION_SOURCES src/temporal/temporal_aggregates.cpp src/temporal/tbox.cpp src/temporal/tbox_functions.cpp + src/geo/geography.cpp + src/geo/geography_functions.cpp src/geo/stbox.cpp src/geo/stbox_functions.cpp src/geo/tgeompoint.cpp diff --git a/Makefile b/Makefile index bc17d050..a9485498 100644 --- a/Makefile +++ b/Makefile @@ -11,9 +11,39 @@ include extension-ci-tools/makefiles/duckdb_extension.Makefile # both MEOS (meos_initialize_timezone) and DuckDB (DBConfig::SetOptionByName # "TimeZone") to Europe/Brussels. Tests pass on any OS timezone — the # extension is the single source of truth, no TZ env var needed. +# +# LoadInternal also calls ExtensionHelper::AutoLoadExtension(db, "icu") so +# the timezone option is honoured. Autoload looks for the extension on disk +# at $HOME/.duckdb/extensions///icu.duckdb_extension +# and falls back to a hub download. That fails both inside the linux_amd64 +# test docker container (empty path, no network egress) and on the macOS +# osx_arm64 test runner (hub icu not reliably resolvable). We copy the +# icu.duckdb_extension that was built locally as part of this extension's +# build (declared in extension_config.cmake) into the expected path, +# matched to the DuckDB platform string, before running the unittester. +DUCKDB_VERSION_TAG := v1.4.4 + +define stage_icu + @if [ -f ./build/$(1)/extension/icu/icu.duckdb_extension ]; then \ + case "$$(uname -s)-$$(uname -m)" in \ + Linux-x86_64) platform=linux_amd64 ;; \ + Linux-aarch64) platform=linux_arm64 ;; \ + Darwin-arm64) platform=osx_arm64 ;; \ + Darwin-x86_64) platform=osx_amd64 ;; \ + *) platform=$$(uname -m) ;; \ + esac; \ + target=$$HOME/.duckdb/extensions/$(DUCKDB_VERSION_TAG)/$$platform; \ + mkdir -p "$$target" && cp -f ./build/$(1)/extension/icu/icu.duckdb_extension "$$target/" && \ + echo "Staged icu.duckdb_extension at $$target/"; \ + fi +endef + test_release_internal: + $(call stage_icu,release) ./build/release/$(TEST_PATH) "$(PROJ_DIR)test/*" test_debug_internal: + $(call stage_icu,debug) ./build/debug/$(TEST_PATH) "$(PROJ_DIR)test/*" test_reldebug_internal: - ./build/reldebug/$(TEST_PATH) "$(PROJ_DIR)test/*" \ No newline at end of file + $(call stage_icu,reldebug) + ./build/reldebug/$(TEST_PATH) "$(PROJ_DIR)test/*" diff --git a/README.md b/README.md index 0b8c97b5..6555e527 100644 --- a/README.md +++ b/README.md @@ -21,6 +21,12 @@ MobilityDuck because of properties of DuckDB's parser, type system, or extension model. These cases — and the named-function workarounds where one exists — are documented in [`docs/DuckDB-Parity-Gaps.md`](docs/DuckDB-Parity-Gaps.md). +Geodetic geography values cross the DuckDB columnar boundary via a dedicated +`GEOGRAPHY` LogicalType that MobilityDuck registers in addition to the bundled +DuckDB Spatial `GEOMETRY`. The design — closed-algebra inside MEOS, thin +boundary layer in the binding, and TemporalParquet round-trip preservation of +the geodetic flag — is documented in [`doc/geography-boundary.md`](doc/geography-boundary.md). + --- ## 1. Requirements MobilityDuck needs some dependencies(including MEOS) which can be installed through VCPKG. Run the following to enable it: diff --git a/doc/geography-boundary.md b/doc/geography-boundary.md new file mode 100644 index 00000000..6b5d4c09 --- /dev/null +++ b/doc/geography-boundary.md @@ -0,0 +1,157 @@ +# DuckDB ↔ MEOS geography boundary + +How MobilityDuck represents geodetic geography values across the MEOS↔DuckDB columnar boundary, and why a separate `GEOGRAPHY` LogicalType is required even though MEOS already handles geodetic semantics internally. + +## The problem in one paragraph + +MEOS has the **closed-algebra property** for geography: every geographic operation — `geog_in`, `geog_area`, `eIntersects(geog, geog)`, `tgeog_length`, `tgeog_speed`, and so on — takes geodetic inputs, performs WGS-84 spheroidal-metre computation, and returns a properly-typed geodetic result without leaving the MEOS C runtime. As long as the value stays inside MEOS, the geodetic flag is preserved in the `GSERIALIZED` type tag and the spheroidal interpretation is automatic. + +The problem appears only at the boundary. When MobilityDuck projects a MEOS geography value into DuckDB's columnar layout, DuckDB's **bundled `spatial` extension exposes one logical type — `GEOMETRY` — that has no geodetic bit**. The flag is therefore at risk of being lost the moment a MEOS geography result becomes a DuckDB column value: the next operator in the query plan, the COPY-to-Parquet writer, or the join key extraction would see a plain WKB blob with no way to know whether it should be interpreted on the sphere or the plane. + +## The solution + +MobilityDuck **registers its own `GEOGRAPHY` LogicalType** — a `BLOB` alias whose payload is MEOS-WKB with the geodetic flag preserved in the type tag. The semantics live in MEOS; DuckDB only carries the BLOB through the columnar engine with a stable alias name. No change to DuckDB itself is needed; no dependence on a third-party `duckdb-geography` extension. + +This is an instance of the standing ecosystem rule that every binding owns a *thin boundary layer* converting platform-native types to/from the MEOS canonical encoding, with the canonical encoding never leaking and the platform-native type never leaking into MEOS calls. + +``` + ┌──────────────────────────────────────────────────────────┐ + │ DuckDB columnar engine │ + │ ───────────────────────────────────────────── │ + │ GEOMETRY (BLOB alias, no geodetic bit) │ + │ GEOGRAPHY (BLOB alias, MEOS-WKB with geodetic bit) │ + │ TGEOGPOINT (BLOB alias, temporal geodetic point) │ + │ TGEOMPOINT (BLOB alias, temporal planar point) │ + └────────────────┬─────────────────────────────────────────┘ + │ MobilityDuck boundary layer + │ (ST_GeogFromText, ST_AsText, casts, …) + ▼ + ┌──────────────────────────────────────────────────────────┐ + │ MEOS C runtime (closed algebra) │ + │ ───────────────────────────────────────────── │ + │ GSERIALIZED (geodetic flag in type tag) │ + │ geog_in, geog_area, eIntersects(geog,geog), … │ + │ length(tgeog), speed(tgeog), tDwithin(tgeog,tgeog), …│ + │ stays inside MEOS — no scalar value ever crosses │ + │ the boundary mid-computation │ + └──────────────────────────────────────────────────────────┘ +``` + +## Registration + +```cpp +// src/spatial/geography.cpp (sketch) +LogicalType GEOGRAPHY = LogicalType::BLOB; +GEOGRAPHY.SetAlias("GEOGRAPHY"); +ExtensionLoader::RegisterType(loader, "GEOGRAPHY", GEOGRAPHY); +``` + +The alias makes `INSERT INTO … VALUES (geography 'POINT(4.35 50.85)')` parse, `SELECT ST_GeogFromText('POINT(4.35 50.85)')` type-check, and TemporalParquet round-trips preserve the type information (the alias is stored in the Parquet `temporal` footer JSON; readers reconstruct it). + +## I/O surface + +The functions MobilityDuck registers on top of the `GEOGRAPHY` LogicalType. Each call is a thin shim over the corresponding MEOS export — no semantic logic in the binding. + +| DuckDB UDF | DuckDB signature | MEOS function called | +|---|---|---| +| `ST_GeogFromText(VARCHAR)` | → `GEOGRAPHY` | `geog_in` | +| `ST_AsText(GEOGRAPHY)` | → `VARCHAR` | `geo_as_ewkt` | +| `ST_AsBinary(GEOGRAPHY)` | → `BLOB` | `geo_as_ewkb` | +| `ST_GeogFromBinary(BLOB)` | → `GEOGRAPHY` | `geo_from_ewkb` (asserts geodetic flag) | +| `geography(BLOB)` (implicit cast) | `BLOB` → `GEOGRAPHY` | `geo_from_ewkb` | +| `geometry(GEOGRAPHY)` (explicit cast) | `GEOGRAPHY` → `GEOMETRY` | flips the geodetic bit, keeps the WKB | +| `geography(GEOMETRY)` (explicit cast) | `GEOMETRY` → `GEOGRAPHY` | asserts lon/lat range, sets the geodetic bit | + +## Operations stay closed inside MEOS + +Every operation on a `GEOGRAPHY` column delegates to a MEOS function that takes geodetic input and returns the correct type. The binding never has to know what "geodetic length on the sphere" means; it just calls the right C function. + +| DuckDB UDF | Returns | MEOS function called | +|---|---|---| +| `length(GEOGRAPHY)` | `DOUBLE` (metres, spheroidal) | `geog_length` | +| `area(GEOGRAPHY, spheroid BOOLEAN)` | `DOUBLE` (m²) | `geog_area` | +| `eIntersects(GEOGRAPHY, GEOGRAPHY)` | `BOOLEAN` | `geog_intersects` | +| `eContains(GEOGRAPHY, GEOGRAPHY)` | `BOOLEAN` | `geog_contains` | +| `nearestApproachDistance(GEOGRAPHY, GEOGRAPHY)` | `DOUBLE` | `geog_distance` | +| `tgeogpoint(GEOGRAPHY, TIMESTAMPTZ)` | `TGEOGPOINT` | `tgeogpoint_make` | +| `valueAtTimestamp(TGEOGPOINT, TIMESTAMPTZ)` | `GEOGRAPHY` | already returns the geodetic-flagged GSERIALIZED | + +DuckDB never sees a non-geodetic representation of a geodetic value during a computation: every intermediate stays inside MEOS until the final result hits the column boundary, at which point it is either a primitive type (DOUBLE, BOOLEAN, TIMESTAMPTZ) or a properly-typed `GEOGRAPHY` / `TGEOGPOINT` BLOB. + +## Cast matrix + +The complete set of inter-type conversions involving `GEOGRAPHY`. Implicit casts apply where the conversion is unambiguous and lossless; explicit casts where there is a semantic choice (most commonly: dropping or setting the geodetic flag, or asserting a coordinate range). + +| | `GEOMETRY` (DuckDB Spatial) | `GEOGRAPHY` (MobilityDuck) | `TGEOGPOINT` (MobilityDuck) | `TGEOMPOINT` (MobilityDuck) | +|---|:---:|:---:|:---:|:---:| +| **from `GEOMETRY`** | (identity) | explicit cast: assert lon/lat; set geodetic flag | invalid | implicit | +| **from `GEOGRAPHY`** | explicit cast: drop geodetic flag | (identity) | via `tgeogpoint(GEOGRAPHY, TIMESTAMPTZ)` | invalid | +| **from `TGEOGPOINT`** | via `geometry(tgeogpoint)` | via `valueAtTimestamp` then implicit | (identity) | `tgeogpoint_to_tgeompoint` | +| **from `TGEOMPOINT`** | implicit | invalid | `tgeompoint_to_tgeogpoint` | (identity) | + +This is the same shape MobilityDB-on-Postgres has between `geometry` / `geography` / `tgeompoint` / `tgeogpoint`. MobilityDuck mirrors the matrix; MEOS does the conversion work. + +## TemporalParquet round-trip preservation + +A column declared `GEOGRAPHY` in MobilityDuck is written to Parquet as `BYTE_ARRAY` carrying MEOS-WKB with the geodetic flag in the type tag. The TemporalParquet footer JSON records the type alias (`"base_type": "geography"`), so a downstream reader (MobilityDuck, MobilityDB, MobilitySpark, MobilityAPI) reconstructs both the alias and the geodetic interpretation without ambiguity: + +```json +{ + "temporal": { + "trajectory": { + "base_type": "tgeogpoint", + "geodetic": true, + "srid": 4326, + "subtype": "Sequence", + "interpolation": "linear" + }, + "footprint": { + "base_type": "geography", + "geodetic": true, + "srid": 4326 + } + } +} +``` + +Closed-algebra producers (`spaceTimeSplit`, `valueSet`, `eIntersection`) preserve the type — `eIntersection(GEOGRAPHY, GEOGRAPHY)` returns `GEOGRAPHY`, and the round-trip through Parquet is a no-op as long as the writer and reader both honour the footer convention. + +## Pitfalls a binding implementation must avoid + +| Pitfall | Why it breaks the boundary | +|---|---| +| Storing a `GEOGRAPHY` value as `GEOMETRY` for join compatibility | DuckDB's `GEOMETRY` has no geodetic bit; the next operator interprets the WKB on the plane and returns Cartesian metres instead of spheroidal metres | +| Reusing the DuckDB Spatial `ST_*` functions on `GEOGRAPHY` BLOBs | DuckDB Spatial's `ST_Length`, `ST_Area`, `ST_Intersects` are Cartesian; they ignore the geodetic flag in the input and produce planar results | +| Using `ST_GeomFromText` to construct a `GEOGRAPHY` value | The DuckDB Spatial constructor sets the geodetic flag to false; MobilityDuck must use its own `ST_GeogFromText` shim | +| Stripping the geodetic flag in TemporalParquet output to "save space" | The flag is a single bit in the type tag; stripping it makes the round-trip lossy and breaks every downstream consumer | +| Treating `GEOGRAPHY` as a column with the DuckDB Spatial `GEOMETRY` extension's spatial index | Spatial indexes on planar metrics produce wrong candidate sets for geodetic queries; use `th3index` or `TRTREE` instead | + +## State of the implementation + +| Component | Where | Status | +|---|---|---| +| MEOS closed-algebra geodetic functions | MobilityDB MEOS C library, master | Available — `geog_in`, `geog_area`, `geog_intersects`, `geog_length`, `tgeogpoint_make`, etc. | +| `tgeogpoint` LogicalType + temporal-geographic UDFs | MobilityDuck `src/geo/tgeogpoint*.cpp` | Already registered | +| `GEOGRAPHY` LogicalType + ST_GeogFromText / ST_AsText / ST_AsBinary / ST_GeogFromBinary | `src/geo/geography.{cpp,hpp}` + `src/geo/geography_functions.{cpp,hpp}` | Available | +| Explicit casts `GEOMETRY` ⇄ `GEOGRAPHY` | `src/geo/geography_functions.cpp` | Available (cast drops SRID via DuckDB-Spatial GEOMETRY which has no SRID slot; EWKT/EWKB round-trip preserves SRID) | +| `TGEOGPOINT(GEOGRAPHY, TIMESTAMPTZ)` constructor + `GEOGRAPHY`-typed `tgeogpoint_*` overloads | Inherits the explicit cast; `TGEOGPOINT(GEOMETRY, …)` is already registered | Available transparently via the cast | +| `ST_Length(GEOGRAPHY)` / `ST_Area(GEOGRAPHY)` / `eIntersects(TGEOGPOINT, GEOGRAPHY)` / `nearestApproachDistance(TGEOGPOINT, GEOGRAPHY)` | `src/geo/geography_functions.cpp` (thin shims over MEOS `geog_length` / `geog_area` / `eintersects_tgeo_geo` / `nad_tgeo_geo`) | Available | +| TemporalParquet footer support for `"base_type": "geography"` | `tools/temporal_parquet.py` | Already supports arbitrary `base_type` strings; the consumer reads the alias verbatim | +| Tests for round-trip, value-equality, cast-matrix, length/area numeric checks | `test/sql/geography.test` (planned) | Pending | + +Geodetic `stbox_area` is honoured directly by MEOS; the binding does not approximate. [PR #165](https://github.com/MobilityDB/MobilityDuck/pull/165) removes the `Spherical_lonlat_rect_area_m2` / `Geodetic_stbox_footprint_area` paths so the binding owns no geodetic semantics. + +## Pending work + +| Item | LoC | Notes | +|---|---|---| +| Full `test/sql/geography.test` matrix | ~200 | Round-trip, cast-matrix, numeric checks against MEOS-on-Postgres ground truth | + +The cost is bounded because every line of geodetic semantics already exists in MEOS; the binding just labels and routes. + +## See also + +- [`doc/multi-duckdb-version.md`](multi-duckdb-version.md) — version-target story; the geography boundary registers identically on DuckDB v1.4.4 and v1.5.x. +- [Discussion #913 — Temporal Data Lake RFC](https://github.com/MobilityDB/MobilityDB/discussions/913) — places `tgeogpoint` (and by extension `GEOGRAPHY`) at the centre of the cross-platform query dialect. +- [`docs/DuckDB-Parity-Gaps.md`](../docs/DuckDB-Parity-Gaps.md) — catalogues the few MobilityDB SQL surfaces that have no DuckDB equivalent. +- MobilityDB MEOS C-library headers `meos_geo.h` — the closed-algebra function declarations that this boundary layer dispatches to. diff --git a/src/geo/geography.cpp b/src/geo/geography.cpp new file mode 100644 index 00000000..48ef2c7e --- /dev/null +++ b/src/geo/geography.cpp @@ -0,0 +1,32 @@ +// MobilityDuck `GEOGRAPHY` LogicalType — see `doc/geography-boundary.md` for +// the full boundary design. This translation unit ships the foundation only: +// the LogicalType alias and its registration with the ExtensionLoader. +// +// Casts (GEOMETRY ⇄ GEOGRAPHY, GEOGRAPHY ⇄ TGEOGPOINT) and the I/O UDFs +// (ST_GeogFromText, ST_AsText, ST_AsBinary, ST_GeogFromBinary) land in +// follow-up PRs that build on this registration. +// +// Include order mirrors the existing static-type pattern (see stbox.cpp): +// meos_wrapper_simple.hpp first so meos.h's Interval/Timestamp typedefs land +// in C linkage before any DuckDB header pulls in the duckdb:: variants. +#include "meos_wrapper_simple.hpp" + +#include "common.hpp" +#include "geo/geography.hpp" + +#include "duckdb/common/types.hpp" +#include "duckdb/main/extension/extension_loader.hpp" + +namespace duckdb { + +LogicalType GeographyType::GEOGRAPHY() { + LogicalType type(LogicalTypeId::BLOB); + type.SetAlias("GEOGRAPHY"); + return type; +} + +void GeographyType::RegisterType(ExtensionLoader &loader) { + loader.RegisterType("GEOGRAPHY", GEOGRAPHY()); +} + +} // namespace duckdb diff --git a/src/geo/geography_functions.cpp b/src/geo/geography_functions.cpp new file mode 100644 index 00000000..74467414 --- /dev/null +++ b/src/geo/geography_functions.cpp @@ -0,0 +1,353 @@ +// MobilityDuck `GEOGRAPHY` I/O UDFs — implementation. See +// `doc/geography-boundary.md` for the boundary design. +// +// The `GEOGRAPHY` LogicalType is a BLOB whose bytes are the raw GSERIALIZED +// struct (varlena layout — `VARSIZE(gs)` total bytes, including the 4-byte +// varlena header). Storing raw GSERIALIZED bytes preserves the geodetic +// flag in the type tag across the DuckDB columnar boundary, which standard +// EWKB does not carry. +// +// Include order mirrors the existing static-type pattern (see stbox_functions.cpp): +// meos_wrapper_simple.hpp first so meos.h's Interval/Timestamp typedefs land +// in C linkage before any DuckDB header pulls in the duckdb:: variants. +#include "meos_wrapper_simple.hpp" + +#include "common.hpp" +#include "geo/geography.hpp" +#include "geo/geography_functions.hpp" +#include "geo/tgeogpoint.hpp" +#include "geo_util.hpp" +#include "spatial/spatial_types.hpp" +#include "tydef.hpp" + +#include "duckdb/common/types/blob.hpp" +#include "duckdb/function/scalar_function.hpp" +#include "duckdb/main/extension/extension_loader.hpp" + +#include +#include + +extern "C" { + #include // MEOS_FLAGS_SET_GEODETIC +} + +namespace duckdb { + +// ----- BLOB <-> GSERIALIZED helpers ------------------------------------- + +// Allocate + copy GSERIALIZED into a DuckDB BLOB string_t. Caller owns +// `gs` and remains responsible for freeing it. The varlena VARSIZE macro +// gives the total byte size (4-byte header + payload). +static string_t SerializeGserializedToBlob(const GSERIALIZED *gs, Vector &result) { + size_t gs_size = VARSIZE(gs); + uint8_t *gs_data = static_cast(malloc(gs_size)); + if (!gs_data) { + throw InternalException("GeographyFunctions: failed to allocate %zu bytes for GEOGRAPHY blob", gs_size); + } + std::memcpy(gs_data, gs, gs_size); + string_t blob(reinterpret_cast(gs_data), gs_size); + string_t stored = StringVector::AddStringOrBlob(result, blob); + free(gs_data); + return stored; +} + +// Read a GEOGRAPHY BLOB into a GSERIALIZED pointer that the caller owns +// (must `free` it). The pointer aliases a fresh heap copy of the BLOB +// payload — the original BLOB string_t may be backed by a non-owning +// vector buffer, so copying is required before MEOS functions touch it. +static GSERIALIZED *DeserializeBlobToGserialized(string_t input) { + size_t data_size = input.GetSize(); + if (data_size < sizeof(uint32_t)) { + throw InvalidInputException("GEOGRAPHY blob is too small to be a valid GSERIALIZED (got %zu bytes)", data_size); + } + uint8_t *gs_copy = static_cast(malloc(data_size)); + if (!gs_copy) { + throw InternalException("GeographyFunctions: failed to allocate %zu bytes to deserialize GEOGRAPHY blob", data_size); + } + std::memcpy(gs_copy, input.GetData(), data_size); + return reinterpret_cast(gs_copy); +} + +// ----- ST_GeogFromText (VARCHAR -> GEOGRAPHY) ---------------------------- + +void GeographyFunctions::ST_GeogFromText(DataChunk &args, ExpressionState &state, Vector &result) { + UnaryExecutor::ExecuteWithNulls( + args.data[0], result, args.size(), + [&](string_t input, ValidityMask &mask, idx_t idx) -> string_t { + std::string s(input.GetData(), input.GetSize()); + // typmod == -1: no schema-imposed modifier; geog_in parses the + // EWKT verbatim and sets the geodetic flag based on the + // resulting type tag + SRID. + GSERIALIZED *gs = geog_in(s.c_str(), -1); + if (!gs) { + throw InvalidInputException("ST_GeogFromText: MEOS geog_in failed on `%s`", s.c_str()); + } + string_t blob = SerializeGserializedToBlob(gs, result); + free(gs); + return blob; + } + ); + if (args.size() == 1) { + result.SetVectorType(VectorType::CONSTANT_VECTOR); + } +} + +// ----- ST_AsText (GEOGRAPHY -> VARCHAR) ---------------------------------- + +void GeographyFunctions::ST_AsText(DataChunk &args, ExpressionState &state, Vector &result) { + UnaryExecutor::Execute( + args.data[0], result, args.size(), + [&](string_t input) -> string_t { + GSERIALIZED *gs = DeserializeBlobToGserialized(input); + // EWKT carries the SRID prefix; ST_GeogFromText (round-trip) + // uses the SRID to re-assert geodetic-ness, so the trip is + // lossless wrt the geodetic flag. + char *txt = geo_as_ewkt(gs, /*precision=*/ 6); + if (!txt) { + free(gs); + throw InternalException("ST_AsText: MEOS geo_as_ewkt returned NULL"); + } + std::string s(txt); + string_t out = StringVector::AddString(result, s); + free(txt); + free(gs); + return out; + } + ); + if (args.size() == 1) { + result.SetVectorType(VectorType::CONSTANT_VECTOR); + } +} + +// ----- ST_AsBinary (GEOGRAPHY -> BLOB) ----------------------------------- + +void GeographyFunctions::ST_AsBinary(DataChunk &args, ExpressionState &state, Vector &result) { + UnaryExecutor::Execute( + args.data[0], result, args.size(), + [&](string_t input) -> string_t { + GSERIALIZED *gs = DeserializeBlobToGserialized(input); + size_t wkb_size = 0; + // NULL endian => default platform endianness (NDR / little + // on x86_64 / arm64). + uint8_t *wkb = geo_as_ewkb(gs, /*endian=*/ nullptr, &wkb_size); + if (!wkb || wkb_size == 0) { + free(gs); + throw InternalException("ST_AsBinary: MEOS geo_as_ewkb returned empty buffer"); + } + string_t blob(reinterpret_cast(wkb), wkb_size); + string_t stored = StringVector::AddStringOrBlob(result, blob); + free(wkb); + free(gs); + return stored; + } + ); + if (args.size() == 1) { + result.SetVectorType(VectorType::CONSTANT_VECTOR); + } +} + +// ----- ST_GeogFromBinary (BLOB -> GEOGRAPHY) ----------------------------- + +void GeographyFunctions::ST_GeogFromBinary(DataChunk &args, ExpressionState &state, Vector &result) { + UnaryExecutor::Execute( + args.data[0], result, args.size(), + [&](string_t input) -> string_t { + const uint8_t *wkb = reinterpret_cast(input.GetData()); + size_t wkb_size = input.GetSize(); + if (wkb_size == 0) { + throw InvalidInputException("ST_GeogFromBinary: empty WKB input"); + } + // SRID 0: defer to the WKB header's SRID (geo_from_ewkb honours + // the SRID embedded in EWKB). The result is a geometry-flagged + // GSERIALIZED; we explicitly set the geodetic flag, which + // standard EWKB does not carry. + GSERIALIZED *gs = geo_from_ewkb(wkb, wkb_size, /*srid=*/ 0); + if (!gs) { + throw InvalidInputException("ST_GeogFromBinary: MEOS geo_from_ewkb failed to parse %zu-byte WKB", wkb_size); + } + MEOS_FLAGS_SET_GEODETIC(gs->gflags, true); + string_t blob = SerializeGserializedToBlob(gs, result); + free(gs); + return blob; + } + ); + if (args.size() == 1) { + result.SetVectorType(VectorType::CONSTANT_VECTOR); + } +} + +// ----- GEOMETRY <-> GEOGRAPHY casts -------------------------------------- + +// GEOMETRY (sgl serde) -> GEOGRAPHY (raw GSERIALIZED, geodetic flag set). +// `GeometryToGSerialized` parses the WKB the sgl serde emits via +// `WKBWriter::Write`; SRID 0 lets the WKB header carry the SRID. The +// geodetic flag is set explicitly on the resulting GSERIALIZED. +bool GeographyFunctions::Geometry_to_geography_cast(Vector &source, Vector &result, + idx_t count, CastParameters &) { + UnaryExecutor::Execute( + source, result, count, + [&](string_t geom_blob) -> string_t { + GSERIALIZED *gs = GeometryToGSerialized(geom_blob, /*srid=*/ 0); + MEOS_FLAGS_SET_GEODETIC(gs->gflags, true); + string_t blob = SerializeGserializedToBlob(gs, result); + free(gs); + return blob; + } + ); + if (count == 1) { + result.SetVectorType(VectorType::CONSTANT_VECTOR); + } + return true; +} + +// GEOGRAPHY (raw GSERIALIZED) -> GEOMETRY (sgl serde). The geodetic flag +// is cleared so downstream GEOMETRY consumers see a flat geometry; SRID +// is preserved (it lives in the GSERIALIZED header). +bool GeographyFunctions::Geography_to_geometry_cast(Vector &source, Vector &result, + idx_t count, CastParameters &) { + // A per-call arena is sufficient: each sgl serialization writes into + // a fresh DuckDB string_t via `StringVector::EmptyString`, the arena + // backs only the intermediate sgl geometry graph. + ArenaAllocator arena(Allocator::DefaultAllocator()); + UnaryExecutor::Execute( + source, result, count, + [&](string_t geog_blob) -> string_t { + GSERIALIZED *gs = DeserializeBlobToGserialized(geog_blob); + MEOS_FLAGS_SET_GEODETIC(gs->gflags, false); + string_t out = GSerializedToGeometry(gs, arena, result); + free(gs); + return out; + } + ); + if (count == 1) { + result.SetVectorType(VectorType::CONSTANT_VECTOR); + } + return true; +} + +// ----- Scalar operations ------------------------------------------------- + +// Read a Temporal* from the binary input vector (mirrors the existing +// pattern in tgeogpoint_functions.cpp). The caller owns the returned +// pointer. +static Temporal *DeserializeBlobToTemporal(string_t input) { + size_t data_size = input.GetSize(); + uint8_t *buf = static_cast(malloc(data_size)); + if (!buf) { + throw InternalException("GeographyFunctions: failed to allocate %zu bytes for Temporal", data_size); + } + std::memcpy(buf, input.GetData(), data_size); + return reinterpret_cast(buf); +} + +void GeographyFunctions::ST_Length(DataChunk &args, ExpressionState &state, Vector &result) { + UnaryExecutor::Execute( + args.data[0], result, args.size(), + [&](string_t input) -> double { + GSERIALIZED *gs = DeserializeBlobToGserialized(input); + // use_spheroid=true: WGS84 ellipsoidal geodesics, matching the + // MEOS-on-Postgres default for the geography flavour. + double length = geog_length(gs, /*use_spheroid=*/ true); + free(gs); + return length; + } + ); +} + +void GeographyFunctions::ST_Area(DataChunk &args, ExpressionState &state, Vector &result) { + UnaryExecutor::Execute( + args.data[0], result, args.size(), + [&](string_t input) -> double { + GSERIALIZED *gs = DeserializeBlobToGserialized(input); + double area = geog_area(gs, /*use_spheroid=*/ true); + free(gs); + return area; + } + ); +} + +void GeographyFunctions::EIntersects_tgeo_geog(DataChunk &args, ExpressionState &state, Vector &result) { + BinaryExecutor::Execute( + args.data[0], args.data[1], result, args.size(), + [&](string_t temp_blob, string_t geog_blob) -> bool { + Temporal *temp = DeserializeBlobToTemporal(temp_blob); + GSERIALIZED *gs = DeserializeBlobToGserialized(geog_blob); + int r = eintersects_tgeo_geo(temp, gs); + free(gs); + free(temp); + return r == 1; + } + ); +} + +void GeographyFunctions::NAD_tgeo_geog(DataChunk &args, ExpressionState &state, Vector &result) { + BinaryExecutor::Execute( + args.data[0], args.data[1], result, args.size(), + [&](string_t temp_blob, string_t geog_blob) -> double { + Temporal *temp = DeserializeBlobToTemporal(temp_blob); + GSERIALIZED *gs = DeserializeBlobToGserialized(geog_blob); + double d = nad_tgeo_geo(temp, gs); + free(gs); + free(temp); + return d; + } + ); +} + +// ----- Registration ------------------------------------------------------ + +void GeographyFunctions::RegisterScalarFunctions(ExtensionLoader &loader) { + loader.RegisterFunction( + ScalarFunction("ST_GeogFromText", + {LogicalType::VARCHAR}, + GeographyType::GEOGRAPHY(), + ST_GeogFromText)); + loader.RegisterFunction( + ScalarFunction("ST_AsText", + {GeographyType::GEOGRAPHY()}, + LogicalType::VARCHAR, + ST_AsText)); + loader.RegisterFunction( + ScalarFunction("ST_AsBinary", + {GeographyType::GEOGRAPHY()}, + LogicalType::BLOB, + ST_AsBinary)); + loader.RegisterFunction( + ScalarFunction("ST_GeogFromBinary", + {LogicalType::BLOB}, + GeographyType::GEOGRAPHY(), + ST_GeogFromBinary)); + loader.RegisterFunction( + ScalarFunction("ST_Length", + {GeographyType::GEOGRAPHY()}, + LogicalType::DOUBLE, + ST_Length)); + loader.RegisterFunction( + ScalarFunction("ST_Area", + {GeographyType::GEOGRAPHY()}, + LogicalType::DOUBLE, + ST_Area)); + loader.RegisterFunction( + ScalarFunction("eIntersects", + {TGeogpointType::TGEOGPOINT(), GeographyType::GEOGRAPHY()}, + LogicalType::BOOLEAN, + EIntersects_tgeo_geog)); + loader.RegisterFunction( + ScalarFunction("nearestApproachDistance", + {TGeogpointType::TGEOGPOINT(), GeographyType::GEOGRAPHY()}, + LogicalType::DOUBLE, + NAD_tgeo_geog)); +} + +void GeographyFunctions::RegisterCastFunctions(ExtensionLoader &loader) { + loader.RegisterCastFunction( + GeoTypes::GEOMETRY(), + GeographyType::GEOGRAPHY(), + Geometry_to_geography_cast); + loader.RegisterCastFunction( + GeographyType::GEOGRAPHY(), + GeoTypes::GEOMETRY(), + Geography_to_geometry_cast); +} + +} // namespace duckdb diff --git a/src/include/geo/geography.hpp b/src/include/geo/geography.hpp new file mode 100644 index 00000000..6786f000 --- /dev/null +++ b/src/include/geo/geography.hpp @@ -0,0 +1,34 @@ +#pragma once + +// MobilityDuck `GEOGRAPHY` LogicalType — the static, geodetic counterpart to +// DuckDB Spatial's `GEOMETRY`. See `doc/geography-boundary.md` for the full +// boundary design. +// +// This header declares only the LogicalType registration entry point. Casts +// (GEOMETRY ⇄ GEOGRAPHY, GEOGRAPHY ⇄ TGEOGPOINT) and I/O UDFs +// (ST_GeogFromText, ST_AsText, ST_AsBinary, ST_GeogFromBinary) land in +// follow-up PRs as scaffolded in the boundary doc. + +#include "common.hpp" +#include "duckdb/common/types.hpp" + +#include "meos_wrapper_simple.hpp" + +namespace duckdb { + +class ExtensionLoader; + +struct GeographyType { + // LogicalType alias for the static geodetic geography. The payload is a + // BLOB whose bytes are MEOS-WKB with the geodetic flag preserved in the + // type tag. The alias name `GEOGRAPHY` makes + // + // SELECT geography 'POINT(4.35 50.85)' + // + // parse, and lets adopters declare columns as `column_name GEOGRAPHY`. + static LogicalType GEOGRAPHY(); + + static void RegisterType(ExtensionLoader &loader); +}; + +} // namespace duckdb diff --git a/src/include/geo/geography_functions.hpp b/src/include/geo/geography_functions.hpp new file mode 100644 index 00000000..7feb8f02 --- /dev/null +++ b/src/include/geo/geography_functions.hpp @@ -0,0 +1,68 @@ +#pragma once + +// MobilityDuck I/O UDFs for the `GEOGRAPHY` LogicalType. See +// `doc/geography-boundary.md` for the boundary design. Each UDF is a thin +// shim over a MEOS export — the binding owns only the type conversion to +// and from the DuckDB columnar layout, never the geodetic semantics. + +#include "common.hpp" +#include "duckdb/common/types.hpp" +#include "duckdb/function/scalar_function.hpp" + +#include "meos_wrapper_simple.hpp" + +namespace duckdb { + +class ExtensionLoader; + +struct GeographyFunctions { + // VARCHAR -> GEOGRAPHY: MEOS `geog_in(text, typmod)`. Stores the + // resulting GSERIALIZED bytes in the GEOGRAPHY BLOB so the geodetic + // flag in the type tag survives the boundary. + static void ST_GeogFromText(DataChunk &args, ExpressionState &state, Vector &result); + + // GEOGRAPHY -> VARCHAR: MEOS `geo_as_ewkt(gs, precision)`. Output + // carries the SRID prefix so the round-trip through `ST_GeogFromText` + // re-sets the geodetic flag. + static void ST_AsText(DataChunk &args, ExpressionState &state, Vector &result); + + // GEOGRAPHY -> BLOB: MEOS `geo_as_ewkb(gs, endian, &size)`. Output is + // standard EWKB (SRID-prefixed but without MEOS's geodetic flag) — a + // round-trip via `ST_GeogFromBinary` re-asserts geodetic-ness from + // the SRID. + static void ST_AsBinary(DataChunk &args, ExpressionState &state, Vector &result); + + // BLOB -> GEOGRAPHY: MEOS `geo_from_ewkb(wkb, size, srid)`. Re-sets + // the geodetic flag explicitly so the round-trip through standard + // EWKB does not lose it. + static void ST_GeogFromBinary(DataChunk &args, ExpressionState &state, Vector &result); + + // GEOMETRY -> GEOGRAPHY cast: read sgl GEOMETRY, lift to GSERIALIZED, + // re-flag geodetic, store as GEOGRAPHY BLOB. + static bool Geometry_to_geography_cast(Vector &source, Vector &result, + idx_t count, CastParameters ¶meters); + + // GEOGRAPHY -> GEOMETRY cast: read GSERIALIZED from BLOB, clear the + // geodetic flag, emit sgl GEOMETRY via the existing helper. + static bool Geography_to_geometry_cast(Vector &source, Vector &result, + idx_t count, CastParameters ¶meters); + + // Scalar geodetic operations — all thin shims over MEOS exports. + + // GEOGRAPHY -> DOUBLE: MEOS `geog_length(gs, use_spheroid=true)`. + static void ST_Length(DataChunk &args, ExpressionState &state, Vector &result); + + // GEOGRAPHY -> DOUBLE: MEOS `geog_area(gs, use_spheroid=true)`. + static void ST_Area(DataChunk &args, ExpressionState &state, Vector &result); + + // (TGEOGPOINT, GEOGRAPHY) -> BOOLEAN: MEOS `eintersects_tgeo_geo`. + static void EIntersects_tgeo_geog(DataChunk &args, ExpressionState &state, Vector &result); + + // (TGEOGPOINT, GEOGRAPHY) -> DOUBLE: MEOS `nad_tgeo_geo`. + static void NAD_tgeo_geog(DataChunk &args, ExpressionState &state, Vector &result); + + static void RegisterScalarFunctions(ExtensionLoader &loader); + static void RegisterCastFunctions(ExtensionLoader &loader); +}; + +} // namespace duckdb diff --git a/src/include/tydef.hpp b/src/include/tydef.hpp index b7b28109..b9860ca0 100644 --- a/src/include/tydef.hpp +++ b/src/include/tydef.hpp @@ -11,11 +11,27 @@ extern "C" { #include } -// Forward-compat alias for the meosType → MeosType rename (MobilityDB -// pr785-sync-script). Vcpkg's MEOS exposes `MeosType`; existing -// MobilityDuck code still uses `meosType`. This alias bridges the two -// without touching every reference site. -using meosType = MeosType; +// MEOS naming history: `meosType` is the **pre-consolidation** spelling +// and `MeosType` is the **post-consolidation** target (the rename is +// part of the upstream consolidation sweep, not yet reached by the +// vcpkg pin). The current pin +// (`vcpkg_ports/meos/portfile.cmake` REF f11b7443ee98…) is still +// pre-consolidation and exposes `meosType` — see +// meos/include/temporal/meos_catalog.h, where line 121 declares +// `} meosType;`. MobilityDuck's source consistently uses +// `meosType` (verified via `grep -rn '\bmeosType\b' src/`), which +// matches the pin, so no alias is needed today. +// +// An earlier version of this file added `using meosType = MeosType;` +// as a forward-looking bridge for the eventual consolidation bump. +// That alias references `MeosType`, which the current pin does NOT +// yet expose, so it broke the build: +// "'MeosType' does not name a type; did you mean 'meosType'?". +// +// When the MEOS pin is bumped past the consolidation point, restore +// a bridge here (`using meosType = MeosType;` becomes valid then) or +// sweep the source `meosType → MeosType` in one PR — whichever the +// project prefers at that time. namespace duckdb { diff --git a/src/mobilityduck_extension.cpp b/src/mobilityduck_extension.cpp index 0eb7ebd4..d8e8995e 100644 --- a/src/mobilityduck_extension.cpp +++ b/src/mobilityduck_extension.cpp @@ -7,6 +7,8 @@ #include "temporal/temporal_functions.hpp" #include "temporal/temporal.hpp" #include "temporal/tbox.hpp" +#include "geo/geography.hpp" +#include "geo/geography_functions.hpp" #include "geo/stbox.hpp" #include "geo/tgeompoint.hpp" #include "geo/tgeogpoint.hpp" @@ -278,6 +280,13 @@ static void LoadInternal(ExtensionLoader &loader) { StboxType::RegisterCastFunctions(loader); StboxType::RegisterScalarFunctions(loader); + // `GEOGRAPHY` LogicalType + I/O UDFs (ST_GeogFromText / ST_AsText / + // ST_AsBinary / ST_GeogFromBinary) + GEOMETRY <-> GEOGRAPHY casts. + // Operations land in a follow-up PR. See `doc/geography-boundary.md`. + GeographyType::RegisterType(loader); + GeographyFunctions::RegisterScalarFunctions(loader); + GeographyFunctions::RegisterCastFunctions(loader); + SpanTypes::RegisterScalarFunctions(loader); SpanTypes::RegisterTypes(loader); SpanTypes::RegisterCastFunctions(loader); diff --git a/src/temporal/set.cpp b/src/temporal/set.cpp index b803498a..1858e489 100644 --- a/src/temporal/set.cpp +++ b/src/temporal/set.cpp @@ -945,6 +945,14 @@ static inline Set *date_to_set_duckdb(DateADT d) { return date_to_set(ToMeosDate(duckdb::date_t(d))); } +// macOS LP64: int64 (long) and int64_t (long long) are the same width but +// distinct types, so clang rejects passing bigint_to_set where a +// Set *(*)(int64_t) is expected as a non-type template arg. The cast is a +// no-op on Linux. See SetUnionScalarFunction below. +static inline Set *bigint_to_set_duckdb(int64_t i) { + return bigint_to_set(static_cast(i)); +} + struct SetPtrState { Set *accumulated; }; @@ -1069,7 +1077,7 @@ void SetTypes::RegisterSetUnionAgg(ExtensionLoader &loader) { LogicalType::INTEGER, SetTypes::intset())); set_union_set.AddFunction( AggregateFunction::UnaryAggregateDestructor>( + SetUnionScalarFunction>( LogicalType::BIGINT, SetTypes::bigintset())); set_union_set.AddFunction( AggregateFunction::UnaryAggregateDestructorblob = blob; r->vsize = vsize; r->vorigin = vorigin; - return r; + // DuckDB 1.4.4 disallows implicit derived->base unique_ptr conversion; + // explicit base-type construction from the moved-from derived pointer. + return unique_ptr_cast(std::move(r)); } bool Equals(const FunctionData &other_p) const override { auto &other = other_p.Cast(); diff --git a/test/sql/geography.test b/test/sql/geography.test new file mode 100644 index 00000000..0786dc9d --- /dev/null +++ b/test/sql/geography.test @@ -0,0 +1,201 @@ +# GEOGRAPHY LogicalType + I/O surface + casts. Covers: +# - Type alias registration (column type, NULL handling) +# - ST_GeogFromText / ST_AsText round-trip via EWKT +# - ST_AsBinary / ST_GeogFromBinary round-trip via EWKB +# - GEOMETRY <-> GEOGRAPHY explicit casts + +require mobilityduck + +require spatial + +# --- Type alias registration --------------------------------------------- + +statement ok +CREATE TABLE geography_sanity (g GEOGRAPHY); + +statement ok +INSERT INTO geography_sanity VALUES (NULL); + +query I +SELECT g FROM geography_sanity; +---- +NULL + +statement ok +DROP TABLE geography_sanity; + +# --- ST_GeogFromText / ST_AsText round-trip ------------------------------ + +query I +SELECT ST_AsText(ST_GeogFromText('SRID=4326;POINT(4.35 50.85)')); +---- +SRID=4326;POINT(4.35 50.85) + +query I +SELECT ST_AsText(ST_GeogFromText('SRID=4326;LINESTRING(4.35 50.85, 4.40 50.90)')); +---- +SRID=4326;LINESTRING(4.35 50.85,4.4 50.9) + +# --- ST_AsBinary / ST_GeogFromBinary round-trip -------------------------- + +query I +SELECT ST_AsText(ST_GeogFromBinary(ST_AsBinary(ST_GeogFromText('SRID=4326;POINT(4.35 50.85)')))); +---- +SRID=4326;POINT(4.35 50.85) + +# --- GEOMETRY <-> GEOGRAPHY casts ---------------------------------------- +# +# Both directions reuse the underlying GSERIALIZED; the cast toggles the +# geodetic flag. DuckDB-Spatial GEOMETRY has no SRID slot, so a round-trip +# via GEOMETRY drops the SRID prefix (a value-equivalent geodetic +# round-trip is provided by EWKT / EWKB above). + +query I +SELECT ST_AsText(CAST(ST_GeomFromText('POINT(4.35 50.85)') AS GEOGRAPHY)); +---- +POINT(4.35 50.85) + +query I +SELECT ST_AsText(CAST(CAST(ST_GeogFromText('SRID=4326;POINT(4.35 50.85)') AS GEOMETRY) AS GEOGRAPHY)); +---- +POINT(4.35 50.85) + +# --- Geodetic operations: ST_Length / ST_Area ---------------------------- +# +# Numeric values are MEOS-on-Postgres ground truth (use_spheroid=true). + +# A degenerate single-point LINESTRING has zero geodesic length. +query I +SELECT ST_Length(ST_GeogFromText('SRID=4326;LINESTRING(4.35 50.85, 4.35 50.85)')); +---- +0.0 + +# A short geodesic between Brussels Midi and a point ~5 km north. +query I +SELECT ROUND(ST_Length(ST_GeogFromText('SRID=4326;LINESTRING(4.35 50.85, 4.35 50.90)'))) +---- +5562 + +# A degenerate POINT has zero geodesic area. +query I +SELECT ST_Area(ST_GeogFromText('SRID=4326;POINT(4.35 50.85)')); +---- +0.0 + +# A small triangle in Brussels — area in m^2 on the WGS84 ellipsoid. +query I +SELECT ROUND(ST_Area(ST_GeogFromText('SRID=4326;POLYGON((4.35 50.85, 4.40 50.85, 4.35 50.90, 4.35 50.85))'))) +---- +9792944.0 + +# --- TGEOGPOINT × GEOGRAPHY operations: eIntersects / nad ---------------- +# +# A temporal point that passes through (4.35, 50.85) at t0: +# - eIntersects with the start endpoint => true +# - eIntersects with a far-away point => false +# - nearestApproachDistance to the start => 0.0 + +query III +SELECT + eIntersects( + CAST('SRID=4326;[POINT(4.35 50.85)@2020-01-01, POINT(4.40 50.90)@2020-01-02]' AS TGEOGPOINT), + ST_GeogFromText('SRID=4326;POINT(4.35 50.85)') + ), + eIntersects( + CAST('SRID=4326;[POINT(4.35 50.85)@2020-01-01, POINT(4.40 50.90)@2020-01-02]' AS TGEOGPOINT), + ST_GeogFromText('SRID=4326;POINT(10.00 50.85)') + ), + nearestApproachDistance( + CAST('SRID=4326;[POINT(4.35 50.85)@2020-01-01, POINT(4.40 50.90)@2020-01-02]' AS TGEOGPOINT), + ST_GeogFromText('SRID=4326;POINT(4.35 50.85)') + ); +---- +true false 0.0 + +# --- NULL propagation ---------------------------------------------------- +# +# Every UDF over GEOGRAPHY honours DuckDB's NULL semantics: a NULL input +# produces NULL output without invoking the MEOS shim. + +query IIII +SELECT + ST_AsText(CAST(NULL AS GEOGRAPHY)), + ST_Length(CAST(NULL AS GEOGRAPHY)), + ST_Area(CAST(NULL AS GEOGRAPHY)), + ST_AsBinary(CAST(NULL AS GEOGRAPHY)); +---- +NULL NULL NULL NULL + +# ST_GeogFromText on a NULL string returns NULL. +query I +SELECT ST_GeogFromText(NULL) IS NULL; +---- +true + +# --- Tabular round-trip via INSERT / SELECT ------------------------------ +# +# Storage of GEOGRAPHY values as a column of the DuckDB native types. + +statement ok +CREATE TABLE cities (name VARCHAR, loc GEOGRAPHY); + +statement ok +INSERT INTO cities VALUES + ('Brussels', ST_GeogFromText('SRID=4326;POINT(4.35 50.85)')), + ('Paris', ST_GeogFromText('SRID=4326;POINT(2.35 48.86)')), + ('Amsterdam', ST_GeogFromText('SRID=4326;POINT(4.90 52.37)')), + (NULL, NULL); + +query II +SELECT name, ST_AsText(loc) FROM cities ORDER BY name NULLS LAST; +---- +Amsterdam SRID=4326;POINT(4.9 52.37) +Brussels SRID=4326;POINT(4.35 50.85) +Paris SRID=4326;POINT(2.35 48.86) +NULL NULL + +# COUNT a column that is partially NULL. +query II +SELECT COUNT(*) AS total, COUNT(loc) AS non_null FROM cities; +---- +4 3 + +statement ok +DROP TABLE cities; + +# --- Numeric ground truth: a larger polygon (Belgium-sized) -------------- +# +# A coarse Belgium-shaped quadrilateral. The value is MEOS-on-Postgres +# ground truth (use_spheroid=true). Tolerates platform-dependent ulp +# variation by rounding. + +query I +SELECT ROUND( + ST_Area(ST_GeogFromText( + 'SRID=4326;POLYGON((2.5 49.5, 6.4 49.5, 6.4 51.5, 2.5 51.5, 2.5 49.5))' + )) / 1e6 +) +---- +61538.0 + +# A ~315 km LINESTRING from Brussels to Paris. +query I +SELECT ROUND(ST_Length(ST_GeogFromText( + 'SRID=4326;LINESTRING(4.35 50.85, 2.35 48.86)' +)) / 1000) +---- +264 + +# --- Round-trip semantic equality through EWKB --------------------------- +# +# A POINT round-tripped through EWKB is semantically identical: the EWKT +# projection matches. The underlying GSERIALIZED layout may differ in +# byte padding between `geog_in` and `geo_from_ewkb`, so equality is +# tested at the EWKT level. + +query I +SELECT + ST_AsText(ST_GeogFromText('SRID=4326;POINT(4.35 50.85)')) = + ST_AsText(ST_GeogFromBinary(ST_AsBinary(ST_GeogFromText('SRID=4326;POINT(4.35 50.85)')))); +---- +true