Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
32 changes: 31 additions & 1 deletion Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -11,9 +11,39 @@ include extension-ci-tools/makefiles/duckdb_extension.Makefile
# both MEOS (meos_initialize_timezone) and DuckDB (DBConfig::SetOptionByName
# "TimeZone") to Europe/Brussels. Tests pass on any OS timezone — the
# extension is the single source of truth, no TZ env var needed.
#
# LoadInternal also calls ExtensionHelper::AutoLoadExtension(db, "icu") so
# the timezone option is honoured. Autoload looks for the extension on disk
# at $HOME/.duckdb/extensions/<duckdb_version>/<platform>/icu.duckdb_extension
# and falls back to a hub download. That fails both inside the linux_amd64
# test docker container (empty path, no network egress) and on the macOS
# osx_arm64 test runner (hub icu not reliably resolvable). We copy the
# icu.duckdb_extension that was built locally as part of this extension's
# build (declared in extension_config.cmake) into the expected path,
# matched to the DuckDB platform string, before running the unittester.
DUCKDB_VERSION_TAG := v1.4.4

define stage_icu
@if [ -f ./build/$(1)/extension/icu/icu.duckdb_extension ]; then \
case "$$(uname -s)-$$(uname -m)" in \
Linux-x86_64) platform=linux_amd64 ;; \
Linux-aarch64) platform=linux_arm64 ;; \
Darwin-arm64) platform=osx_arm64 ;; \
Darwin-x86_64) platform=osx_amd64 ;; \
*) platform=$$(uname -m) ;; \
esac; \
target=$$HOME/.duckdb/extensions/$(DUCKDB_VERSION_TAG)/$$platform; \
mkdir -p "$$target" && cp -f ./build/$(1)/extension/icu/icu.duckdb_extension "$$target/" && \
echo "Staged icu.duckdb_extension at $$target/"; \
fi
endef

test_release_internal:
$(call stage_icu,release)
./build/release/$(TEST_PATH) "$(PROJ_DIR)test/*"
test_debug_internal:
$(call stage_icu,debug)
./build/debug/$(TEST_PATH) "$(PROJ_DIR)test/*"
test_reldebug_internal:
./build/reldebug/$(TEST_PATH) "$(PROJ_DIR)test/*"
$(call stage_icu,reldebug)
./build/reldebug/$(TEST_PATH) "$(PROJ_DIR)test/*"
5 changes: 5 additions & 0 deletions src/include/index/rtree_module.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -87,6 +87,11 @@ class TRTreeIndex : public BoundIndex {
MeosType bbox_meostype;
size_t bbox_size_;
LogicalType column_type_;
// Multi-entry (MEST) split bound for temporal columns: each temporal
// value is indexed as up to this many tight per-segment bounding
// boxes. <= 1 degenerates to the single minimum bounding box (the
// pre-MEST behaviour). Tunable via WITH (max_boxes = N) on the index.
int max_boxes_ = 8;

size_t current_size_ = 0;
size_t current_capacity_ = 0;
Expand Down
26 changes: 21 additions & 5 deletions src/include/tydef.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -11,11 +11,27 @@ extern "C" {
#include <meos_internal.h>
}

// Forward-compat alias for the meosType → MeosType rename (MobilityDB
// pr785-sync-script). Vcpkg's MEOS exposes `MeosType`; existing
// MobilityDuck code still uses `meosType`. This alias bridges the two
// without touching every reference site.
using meosType = MeosType;
// MEOS naming history: `meosType` is the **pre-consolidation** spelling
// and `MeosType` is the **post-consolidation** target (the rename is
// part of the upstream consolidation sweep, not yet reached by the
// vcpkg pin). The current pin
// (`vcpkg_ports/meos/portfile.cmake` REF f11b7443ee98…) is still
// pre-consolidation and exposes `meosType` — see
// meos/include/temporal/meos_catalog.h, where line 121 declares
// `} meosType;`. MobilityDuck's source consistently uses
// `meosType` (verified via `grep -rn '\bmeosType\b' src/`), which
// matches the pin, so no alias is needed today.
//
// An earlier version of this file added `using meosType = MeosType;`
// as a forward-looking bridge for the eventual consolidation bump.
// That alias references `MeosType`, which the current pin does NOT
// yet expose, so it broke the build:
// "'MeosType' does not name a type; did you mean 'meosType'?".
//
// When the MEOS pin is bumped past the consolidation point, restore
// a bridge here (`using meosType = MeosType;` becomes valid then) or
// sweep the source `meosType → MeosType` in one PR — whichever the
// project prefers at that time.

namespace duckdb {

Expand Down
21 changes: 20 additions & 1 deletion src/index/rtree_index_scan.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -28,12 +28,17 @@ BindInfo TRTreeIndexScanBindInfo(const optional_ptr<FunctionData> bind_data_p) {
struct RTreeIndexScanGlobalState : public GlobalTableFunctionState {
DataChunk all_columns;
vector<idx_t> projection_ids;
vector<LogicalType> scanned_types;
ColumnFetchState fetch_state;
TableScanState local_storage_state;
vector<StorageIndex> column_ids;

unique_ptr<IndexScanState> index_state;
Vector row_ids = Vector(LogicalType::ROW_TYPE);
// rowid is BIGINT. Use the LogicalTypeId enumerator, never the
// LogicalType::ROW_TYPE static const member: ODR-using it from this
// extension TU emits a second definition that clashes with libduckdb
// ("multiple definition of duckdb::LogicalType::ROW_TYPE" at link).
Vector row_ids = Vector(LogicalType(LogicalTypeId::BIGINT));
};

static unique_ptr<GlobalTableFunctionState> RTreeIndexScanInitGlobal(ClientContext &context,
Expand All @@ -50,10 +55,24 @@ static unique_ptr<GlobalTableFunctionState> RTreeIndexScanInitGlobal(ClientConte
storage_t col_id = id;
if (id != DConstants::INVALID_INDEX) {
col_id = bind_data.table.GetColumn(LogicalIndex(id)).StorageOid();
result->scanned_types.push_back(bind_data.table.GetColumn(LogicalIndex(id)).Type());
} else {
// rowid column: BIGINT (see row_ids note re: ROW_TYPE ODR clash)
result->scanned_types.emplace_back(LogicalType(LogicalTypeId::BIGINT));
}
result->column_ids.emplace_back(col_id);
}

// Honour projection pushdown exactly like DuckDB's table_scan: when the
// optimizer removes filter-only columns, fetch the full scanned set into
// all_columns (which MUST be initialized with the scanned column types,
// else Fetch hits "Expected vector of type X, found Y") and reference
// the projected subset out of it.
if (input.CanRemoveFilterColumns()) {
result->projection_ids = input.projection_ids;
result->all_columns.Initialize(context, result->scanned_types);
}

// Initialize the storage scan state
result->local_storage_state.Initialize(result->column_ids, context, input.filters);
local_storage.InitializeScan(bind_data.table.GetStorage(), result->local_storage_state.local_state, input.filters);
Expand Down
99 changes: 41 additions & 58 deletions src/index/rtree_module.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -121,7 +121,14 @@ TRTreeIndex::TRTreeIndex(const string &name, IndexConstraintType constraint_type
if (!rtree_) {
throw InternalException("Failed to create MEOS RTree");
}


// MEST split bound: WITH (max_boxes = N). Default 8; N <= 1 reproduces
// the pre-MEST single-box index. Only affects temporal columns.
auto mb_it = options_.find("max_boxes");
if (mb_it != options_.end() && !mb_it->second.IsNull()) {
max_boxes_ = mb_it->second.GetValue<int32_t>();
}

function_matcher = MakeFunctionMatcher();
}

Expand Down Expand Up @@ -222,16 +229,11 @@ ErrorData TRTreeIndex::Insert(IndexLock &lock, DataChunk &data, Vector &row_ids)
box = (STBox*)malloc(stbox_size);

memcpy(box, stbox_data, stbox_size);

int32_t box_srid = stbox_srid(box);
if (box_srid != 0) {
STBox *normalized_box = stbox_set_srid(box, 0);
if (normalized_box) {
free(box);
box = normalized_box;
}
}
}
// No SRID normalisation: index keys keep their natural SRID
// and && requires matching SRID, mirroring MobilityDB/PostGIS
// GiST. Normalising here but not at search time (or vice
// versa) makes ensure_same_srid reject every overlap.
}
else {
continue;
}
Expand Down Expand Up @@ -312,26 +314,18 @@ void TRTreeIndex::Construct(DataChunk &expression_result, Vector &row_identifier

if (indexes_temporal) {
const Temporal *temp = reinterpret_cast<const Temporal *>(data);
// For temporal-spatial types extract the bbox and strip the
// SRID so index keys agree with the SRID-stripped query box
// used at search time (InitializeScan strips it too).
if (bbox_meostype == T_STBOX) {
STBox *box = tspatial_to_stbox(temp);
if (!box) {
continue;
}
if (stbox_srid(box) != 0) {
STBox *normalized = stbox_set_srid(box, 0);
if (normalized) {
free(box);
box = normalized;
}
}
rtree_insert(rtree_, box, static_cast<int>(row_data[i]));
free(box);
} else {
rtree_insert_temporal(rtree_, temp, static_cast<int>(row_data[i]));
}
const int id = static_cast<int>(row_data[i]);
// Multi-entry (MEST): index the temporal as up to max_boxes_
// tight per-segment bounding boxes sharing this id; the
// splitter degenerates to a single minimum bounding box for
// instants or max_boxes_ <= 1, byte-identical to the pre-MEST
// single-box path. The produced boxes carry the temporal's
// own SRID, exactly like rtree_insert_temporal /
// tspatial_to_stbox. Do NOT pre-normalise the SRID:
// tspatial_set_srid(temp, 0) makes the SRID "unknown" and the
// stbox conversion inside the splitter then raises
// "The SRID cannot be unknown" at index-build time.
rtree_insert_temporal_split(rtree_, temp, id, max_boxes_);
continue;
}

Expand All @@ -342,18 +336,9 @@ void TRTreeIndex::Construct(DataChunk &expression_result, Vector &row_identifier

void *box = malloc(data_size);
memcpy(box, data, data_size);

if (bbox_meostype == T_STBOX) {
STBox *stbox = (STBox *) box;
int32_t box_srid = stbox_srid(stbox);
if (box_srid != 0) {
STBox *normalized_box = stbox_set_srid(stbox, 0);
if (normalized_box) {
free(box);
box = normalized_box;
}
}
}
// No SRID normalisation (see Construct/InitializeScan): keep the
// natural SRID so index keys and the query box agree; && requires
// matching SRID, mirroring MobilityDB/PostGIS GiST.

void *target = (char *) boxes + (i * bbox_size_);
memcpy(target, box, bbox_size_);
Expand Down Expand Up @@ -417,21 +402,10 @@ unique_ptr<IndexScanState> TRTreeIndex::InitializeScan(const void* query_blob, s

state->query_box = malloc(blob_size);
memcpy(state->query_box, data, blob_size);
// No SRID normalisation: the query box keeps its natural SRID so
// it matches the index keys (which also keep theirs); && requires
// matching SRID, mirroring MobilityDB/PostGIS GiST.

if (bbox_meostype == T_STBOX) {
STBox *stbox = (STBox*)state->query_box;
int32_t query_srid = stbox_srid(stbox);
if (query_srid != 0) {
STBox *normalized_query = stbox_set_srid(stbox, 0);
if (normalized_query) {
free(state->query_box);
state->query_box = malloc(blob_size);
memcpy(state->query_box, normalized_query, blob_size);
free(normalized_query);
}
}
}

} else {
throw InvalidInputException("Unsupported R-Tree operation: " + operation +
" for bbox_type: " + std::to_string(bbox_meostype));
Expand Down Expand Up @@ -486,9 +460,18 @@ vector<row_t> TRTreeIndex::Search(const void *query_box, RTreeSearchOp op) const

if (count > 0) {
results.reserve(count);
// Multi-entry (MEST) leaves return the same id once per
// overlapping per-segment box. Emit each row exactly once;
// duplicates would otherwise surface as duplicate rows in the
// index scan. Harmless no-op for single-box indexes.
unordered_set<row_t> seen;
seen.reserve(count);
for (int i = 0; i < count; i++) {
int *id = static_cast<int *>(meos_array_get(ids, i));
results.push_back(static_cast<row_t>(*id));
row_t rid = static_cast<row_t>(*id);
if (seen.insert(rid).second) {
results.push_back(rid);
}
}
}
} catch (...) {
Expand Down
77 changes: 77 additions & 0 deletions test/sql/parity/050_index_types.test
Original file line number Diff line number Diff line change
Expand Up @@ -283,3 +283,80 @@ statement error
CREATE INDEX i_unsupported ON idx_unsupported USING TRTREE (x);
----
TRTREE index supports

# =============================================================================
# Multi-entry (MEST) indexing on temporal columns.
#
# A temporal column is indexed as up to max_boxes tight per-segment
# bounding boxes per row. This must (a) never produce false negatives,
# (b) return each row exactly once even when many of its per-segment
# boxes overlap the query (dedup), and (c) behave identically for the
# default split, an explicit max_boxes, and the degenerate max_boxes = 1
# single-box index.
# =============================================================================

statement ok
CREATE TABLE idx_mest(t tgeompoint);

# trip 1: a wiggly zig-zag with a large X-extent (many tight segments);
# trip 2: a small trip far away.
statement ok
INSERT INTO idx_mest VALUES
('[Point(0 0)@2000-01-01, Point(10 0)@2000-01-02, Point(0 0)@2000-01-03, Point(10 0)@2000-01-04, Point(0 0)@2000-01-05]'::tgeompoint),
('[Point(1000 1000)@2000-01-01, Point(1001 1001)@2000-01-02]'::tgeompoint);

statement ok
CREATE INDEX i_mest ON idx_mest USING TRTREE (t);

# Query box covers the whole zig-zag and therefore overlaps many of
# trip 1's per-segment boxes: it must still come back exactly once.
query I
SELECT count(*) FROM idx_mest WHERE t && 'STBOX X((-1,-1),(11,11))'::stbox;
----
1

# Disjoint from both trips: a true negative for single-box and MEST alike.
query I
SELECT count(*) FROM idx_mest WHERE t && 'STBOX X((100,100),(200,200))'::stbox;
----
0

# Near trip 2 only.
query I
SELECT count(*) FROM idx_mest WHERE t && 'STBOX X((999,999),(1002,1002))'::stbox;
----
1

statement ok
DROP INDEX i_mest;

# Explicit, tighter split: same correct results, no false negatives.
statement ok
CREATE INDEX i_mest ON idx_mest USING TRTREE (t) WITH (max_boxes = 16);

query I
SELECT count(*) FROM idx_mest WHERE t && 'STBOX X((-1,-1),(11,11))'::stbox;
----
1

query I
SELECT count(*) FROM idx_mest WHERE t && 'STBOX X((100,100),(200,200))'::stbox;
----
0

statement ok
DROP INDEX i_mest;

# Degenerate single-box index (pre-MEST behaviour) via the option.
statement ok
CREATE INDEX i_mest ON idx_mest USING TRTREE (t) WITH (max_boxes = 1);

query I
SELECT count(*) FROM idx_mest WHERE t && 'STBOX X((-1,-1),(11,11))'::stbox;
----
1

query I
SELECT count(*) FROM idx_mest WHERE t && 'STBOX X((999,999),(1002,1002))'::stbox;
----
1
4 changes: 2 additions & 2 deletions vcpkg_ports/meos/portfile.cmake
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
vcpkg_from_github(
OUT_SOURCE_PATH SOURCE_PATH
REPO MobilityDB/MobilityDB
REF dd4ccd3c2812af88716a318764c837fa5354c5f4
SHA512 4e2e7077a5bd3bce0681047448352b3c6a784aad15e89f2a65feb01b71a6329ffc926079c6ae2dccb8510b797d92b2c4c6a9e0e89ecf737997eabe1880e22a14
REF 7a29e92e7eee37e8b6069e2b686f02d0773ca8c5
SHA512 570d71dadcfebe90c89ebb074e1b7ed1a8d23eab3d4c1f1130e1d2cfef5a0d9a458b045858ac36b66b11499466cb801f8f037d3bc4c000ec63c8a1d06b604a64
)

vcpkg_replace_string(
Expand Down
Loading