From c6ecf1ea2e74c5df79793532bc8c0e2a97896900 Mon Sep 17 00:00:00 2001 From: beinan Date: Wed, 4 Mar 2026 00:26:04 +0000 Subject: [PATCH 1/2] refactor: remove Simple executor implementation Remove the legacy Simple executor to simplify the codebase and reduce maintenance burden. All queries now use the DataFusion planner, which provides better performance and more features. Changes: - Delete simple_executor module (724 lines across 5 files) - Remove ExecutionStrategy::Simple enum variant (breaking change) - Remove CypherQuery::execute_simple() method - Update all tests to use DataFusion execution - Update documentation and examples - Update Python bindings to remove Simple variant Breaking Changes: - Rust: ExecutionStrategy::Simple removed, use None or DataFusion - Python: ExecutionStrategy.Simple removed, use None or DataFusion All existing functionality is preserved - DataFusion supports all features that Simple did with better optimization. Co-Authored-By: Claude Opus 4.6 --- crates/lance-graph-python/src/graph.rs | 5 +- crates/lance-graph/README.md | 12 +- crates/lance-graph/benches/graph_execution.rs | 2 +- crates/lance-graph/src/lib.rs | 1 - crates/lance-graph/src/query.rs | 394 +----------------- .../src/simple_executor/aliases.rs | 44 -- .../src/simple_executor/clauses.rs | 93 ----- .../lance-graph/src/simple_executor/expr.rs | 263 ------------ crates/lance-graph/src/simple_executor/mod.rs | 20 - .../src/simple_executor/path_executor.rs | 304 -------------- .../tests/test_simple_executor_pipeline.rs | 73 ---- 11 files changed, 16 insertions(+), 1195 deletions(-) delete mode 100644 crates/lance-graph/src/simple_executor/aliases.rs delete mode 100644 crates/lance-graph/src/simple_executor/clauses.rs delete mode 100644 crates/lance-graph/src/simple_executor/expr.rs delete mode 100644 crates/lance-graph/src/simple_executor/mod.rs delete mode 100644 crates/lance-graph/src/simple_executor/path_executor.rs delete mode 100644 crates/lance-graph/tests/test_simple_executor_pipeline.rs diff --git a/crates/lance-graph-python/src/graph.rs b/crates/lance-graph-python/src/graph.rs index 2997c8e..223c6aa 100644 --- a/crates/lance-graph-python/src/graph.rs +++ b/crates/lance-graph-python/src/graph.rs @@ -46,8 +46,6 @@ use crate::RT; pub enum ExecutionStrategy { /// Use DataFusion query planner (default, full feature support) DataFusion, - /// Use simple single-table executor (legacy, limited features) - Simple, /// Use Lance native executor (not yet implemented) LanceNative, } @@ -56,7 +54,6 @@ impl From for RustExecutionStrategy { fn from(strategy: ExecutionStrategy) -> Self { match strategy { ExecutionStrategy::DataFusion => RustExecutionStrategy::DataFusion, - ExecutionStrategy::Simple => RustExecutionStrategy::Simple, ExecutionStrategy::LanceNative => RustExecutionStrategy::LanceNative, } } @@ -548,7 +545,7 @@ impl CypherQuery { /// /// >>> # Explicit strategy /// >>> from lance.graph import ExecutionStrategy - /// >>> result = query.execute(datasets, strategy=ExecutionStrategy.Simple) + /// >>> result = query.execute(datasets, strategy=ExecutionStrategy.DataFusion) #[pyo3(signature = (datasets, strategy=None))] fn execute( &self, diff --git a/crates/lance-graph/README.md b/crates/lance-graph/README.md index 89ab1f0..b2da6f8 100644 --- a/crates/lance-graph/README.md +++ b/crates/lance-graph/README.md @@ -7,7 +7,7 @@ A graph query engine for Lance datasets with Cypher syntax support. This crate e - Cypher query parsing and AST construction - Graph configuration for mapping Lance tables to nodes and relationships - Semantic validation with typed `GraphError` diagnostics -- Pluggable execution strategies (DataFusion planner by default, simple executor, Lance Native placeholder) +- Pluggable execution strategies (DataFusion planner by default, Lance Native placeholder) - Async query execution that returns Arrow `RecordBatch` results - JSON-serializable parameter binding for reusable query templates - Logical plan debugging via `CypherQuery::explain` @@ -50,10 +50,7 @@ let query = CypherQuery::new("MATCH (p:Person) WHERE p.age > $min RETURN p.name" let runtime = tokio::runtime::Runtime::new()?; // Use default DataFusion-based execution -let result = runtime.block_on(query.execute(tables.clone(), None))?; - -// Opt in to the simple executor if you only need projection/filter support. -let simple = runtime.block_on(query.execute(tables, Some(ExecutionStrategy::Simple)))?; +let result = runtime.block_on(query.execute(tables, None))?; ``` The query expects a `HashMap` keyed by the labels and relationship types referenced in the Cypher text. Each record batch should expose the columns configured through `GraphConfig` (ID fields, property fields, etc.). Relationship mappings also expect a batch keyed by the relationship type (for example `KNOWS`) that contains the configured source/target ID columns and any optional property columns. @@ -92,10 +89,10 @@ let config = GraphConfig::builder() - `CypherQuery::new` parses Cypher text into the internal AST. - `with_config` attaches the graph configuration used for validation and execution. - `with_parameter` / `with_parameters` bind JSON-serializable values that can be referenced as `$param` in the Cypher text. -- `execute` is asynchronous and returns an Arrow `RecordBatch`. Pass `None` for the default DataFusion planner or `Some(ExecutionStrategy::Simple)` for the single-table executor. `ExecutionStrategy::LanceNative` is reserved for future native execution support and currently errors. +- `execute` is asynchronous and returns an Arrow `RecordBatch`. Pass `None` to use the default DataFusion planner. `ExecutionStrategy::LanceNative` is reserved for future native execution support and currently errors. - `explain` is asynchronous and returns a formatted string containing the graph logical plan alongside the DataFusion logical and physical plans. -Queries with a single `MATCH` clause containing a path pattern are planned as joins using the provided mappings. Other queries can opt into the single-table projection/filter pipeline via `ExecutionStrategy::Simple` when DataFusion's planner is unnecessary. +All queries use the DataFusion planner for optimization and execution. A builder (`CypherQueryBuilder`) is also available for constructing queries programmatically without parsing text. @@ -116,7 +113,6 @@ Basic aggregations like `COUNT` are supported. Optional matches and subqueries a - `semantic` – Lightweight semantic checks on the AST. - `logical_plan` – Builders for graph logical plans. - `datafusion_planner` – DataFusion-based execution planning. -- `simple_executor` – Simple single-table executor. - `config` – Graph configuration types and builders. - `query` – High level `CypherQuery` API and runtime. - `error` – `GraphError` and result helpers. diff --git a/crates/lance-graph/benches/graph_execution.rs b/crates/lance-graph/benches/graph_execution.rs index a488947..45ed854 100644 --- a/crates/lance-graph/benches/graph_execution.rs +++ b/crates/lance-graph/benches/graph_execution.rs @@ -72,7 +72,7 @@ fn execute_cypher_query( datasets: HashMap, ) -> RecordBatch { rt.block_on(async move { - q.execute(datasets, Some(ExecutionStrategy::Simple)) + q.execute(datasets, None) .await .unwrap() }) diff --git a/crates/lance-graph/src/lib.rs b/crates/lance-graph/src/lib.rs index 52b8cc6..8f93fdd 100644 --- a/crates/lance-graph/src/lib.rs +++ b/crates/lance-graph/src/lib.rs @@ -47,7 +47,6 @@ pub mod parameter_substitution; pub mod parser; pub mod query; pub mod semantic; -pub mod simple_executor; pub mod sql_catalog; pub mod sql_query; pub mod table_readers; diff --git a/crates/lance-graph/src/query.rs b/crates/lance-graph/src/query.rs index 018b0aa..c43d1fe 100644 --- a/crates/lance-graph/src/query.rs +++ b/crates/lance-graph/src/query.rs @@ -9,9 +9,6 @@ use crate::config::GraphConfig; use crate::error::{GraphError, Result}; use crate::logical_plan::LogicalPlanner; use crate::parser::parse_cypher_query; -use crate::simple_executor::{ - to_df_boolean_expr_simple, to_df_order_by_expr_simple, to_df_value_expr_simple, PathExecutor, -}; use arrow_array::RecordBatch; use arrow_schema::{Field, Schema, SchemaRef}; use lance_graph_catalog::DirNamespace; @@ -58,8 +55,6 @@ pub enum ExecutionStrategy { /// Use DataFusion query planner (default, full feature support) #[default] DataFusion, - /// Use simple single-table executor (legacy, limited features) - Simple, /// Use Lance native executor (not yet implemented) LanceNative, } @@ -174,8 +169,6 @@ impl CypherQuery { /// .with_config(config); /// // Use the default DataFusion strategy /// let result = query.execute(datasets, None).await?; - /// // Use the Simple strategy explicitly - /// let result = query.execute(datasets, Some(ExecutionStrategy::Simple)).await?; /// ``` pub async fn execute( &self, @@ -185,7 +178,6 @@ impl CypherQuery { let strategy = strategy.unwrap_or_default(); match strategy { ExecutionStrategy::DataFusion => self.execute_datafusion(datasets).await, - ExecutionStrategy::Simple => self.execute_simple(datasets).await, ExecutionStrategy::LanceNative => Err(GraphError::UnsupportedFeature { feature: "Lance native execution strategy is not yet implemented".to_string(), location: snafu::Location::new(file!(), line!(), column!()), @@ -231,12 +223,6 @@ impl CypherQuery { self.execute_with_catalog_and_context(std::sync::Arc::new(catalog), ctx) .await } - ExecutionStrategy::Simple => Err(GraphError::UnsupportedFeature { - feature: - "Simple execution strategy is not supported for namespace-backed execution" - .to_string(), - location: snafu::Location::new(file!(), line!(), column!()), - }), ExecutionStrategy::LanceNative => Err(GraphError::UnsupportedFeature { feature: "Lance native execution strategy is not yet implemented".to_string(), location: snafu::Location::new(file!(), line!(), column!()), @@ -926,174 +912,6 @@ impl CypherQuery { Ok(output) } - /// Execute simple single-table queries (legacy implementation) - /// - /// This method supports basic projection/filter/limit workflows on a single table. - /// For full query support including joins and complex patterns, use `execute()` instead. - /// - /// Note: This implementation is retained for backward compatibility and simple use cases. - pub async fn execute_simple( - &self, - datasets: HashMap, - ) -> Result { - use crate::semantic::SemanticAnalyzer; - use arrow::compute::concat_batches; - use datafusion::datasource::MemTable; - use datafusion::prelude::*; - use std::sync::Arc; - - // Require a config for now, even if we don't fully exploit it yet - let config = self.require_config()?.clone(); - - // Ensure we don't silently ignore unsupported features (e.g. scalar functions). - let mut analyzer = SemanticAnalyzer::new(config); - let semantic = analyzer.analyze(&self.ast, &self.parameters)?; - if !semantic.errors.is_empty() { - return Err(GraphError::PlanError { - message: format!("Semantic analysis failed:\n{}", semantic.errors.join("\n")), - location: snafu::Location::new(file!(), line!(), column!()), - }); - } - - if datasets.is_empty() { - return Err(GraphError::PlanError { - message: "No input datasets provided".to_string(), - location: snafu::Location::new(file!(), line!(), column!()), - }); - } - - // Create DataFusion context and register all provided tables - // Normalize schemas and table names for case-insensitive behavior - let ctx = SessionContext::new(); - for (name, batch) in &datasets { - let normalized_batch = normalize_record_batch(batch)?; - let table = MemTable::try_new( - normalized_batch.schema(), - vec![vec![normalized_batch.clone()]], - ) - .map_err(|e| GraphError::PlanError { - message: format!("Failed to create DataFusion table: {}", e), - location: snafu::Location::new(file!(), line!(), column!()), - })?; - - // Register with lowercase table name - let normalized_name = name.to_lowercase(); - ctx.register_table(&normalized_name, Arc::new(table)) - .map_err(|e| GraphError::PlanError { - message: format!("Failed to register table '{}': {}", name, e), - location: snafu::Location::new(file!(), line!(), column!()), - })?; - } - - // Try to execute a path (1+ hops) if the query is a simple pattern - if let Some(df) = self.try_execute_path_generic(&ctx).await? { - let batches = df.collect().await.map_err(|e| GraphError::PlanError { - message: format!("Failed to collect results: {}", e), - location: snafu::Location::new(file!(), line!(), column!()), - })?; - if batches.is_empty() { - let schema = datasets.values().next().unwrap().schema(); - return Ok(arrow_array::RecordBatch::new_empty(schema)); - } - let merged = concat_batches(&batches[0].schema(), &batches).map_err(|e| { - GraphError::PlanError { - message: format!("Failed to concatenate result batches: {}", e), - location: snafu::Location::new(file!(), line!(), column!()), - } - })?; - return Ok(merged); - } - - // Fallback: single-table style query on the first provided table - let (table_name, batch) = datasets.iter().next().unwrap(); - let schema = batch.schema(); - - // Start a DataFrame from the registered table - let mut df = ctx - .table(table_name) - .await - .map_err(|e| GraphError::PlanError { - message: format!("Failed to create DataFrame for '{}': {}", table_name, e), - location: snafu::Location::new(file!(), line!(), column!()), - })?; - - // Apply WHERE if present (limited support: simple comparisons on a single column) - if let Some(where_clause) = &self.ast.where_clause { - if let Some(filter_expr) = to_df_boolean_expr_simple(&where_clause.expression) { - df = df.filter(filter_expr).map_err(|e| GraphError::PlanError { - message: format!("Failed to apply filter: {}", e), - location: snafu::Location::new(file!(), line!(), column!()), - })?; - } - } - - // Build projection from RETURN clause - let proj_exprs: Vec = self - .ast - .return_clause - .items - .iter() - .map(|item| { - let expr = to_df_value_expr_simple(&item.expression); - if let Some(alias) = &item.alias { - expr.alias(alias) - } else { - expr - } - }) - .collect(); - if !proj_exprs.is_empty() { - df = df.select(proj_exprs).map_err(|e| GraphError::PlanError { - message: format!("Failed to project: {}", e), - location: snafu::Location::new(file!(), line!(), column!()), - })?; - } - - // Apply DISTINCT - if self.ast.return_clause.distinct { - df = df.distinct().map_err(|e| GraphError::PlanError { - message: format!("Failed to apply DISTINCT: {}", e), - location: snafu::Location::new(file!(), line!(), column!()), - })?; - } - - // Apply ORDER BY if present - if let Some(order_by) = &self.ast.order_by { - let sort_expr = to_df_order_by_expr_simple(&order_by.items); - df = df.sort(sort_expr).map_err(|e| GraphError::PlanError { - message: format!("Failed to apply ORDER BY: {}", e), - location: snafu::Location::new(file!(), line!(), column!()), - })?; - } - - // Apply SKIP/OFFSET and LIMIT if present - if self.ast.skip.is_some() || self.ast.limit.is_some() { - let offset = self.ast.skip.unwrap_or(0) as usize; - let fetch = self.ast.limit.map(|l| l as usize); - df = df.limit(offset, fetch).map_err(|e| GraphError::PlanError { - message: format!("Failed to apply SKIP/LIMIT: {}", e), - location: snafu::Location::new(file!(), line!(), column!()), - })?; - } - - // Collect results and concat into a single RecordBatch - let batches = df.collect().await.map_err(|e| GraphError::PlanError { - message: format!("Failed to collect results: {}", e), - location: snafu::Location::new(file!(), line!(), column!()), - })?; - - if batches.is_empty() { - // Return an empty batch with the source schema - return Ok(arrow_array::RecordBatch::new_empty(schema)); - } - - let merged = - concat_batches(&batches[0].schema(), &batches).map_err(|e| GraphError::PlanError { - message: format!("Failed to concatenate result batches: {}", e), - location: snafu::Location::new(file!(), line!(), column!()), - })?; - Ok(merged) - } /// Get all node labels referenced in this query pub fn referenced_node_labels(&self) -> Vec { @@ -1254,97 +1072,6 @@ impl CypherQuery { } impl CypherQuery { - // Generic path executor (N-hop) entrypoint. - async fn try_execute_path_generic( - &self, - ctx: &datafusion::prelude::SessionContext, - ) -> Result> { - use crate::ast::GraphPattern; - // Only support single MATCH clause for path execution - - let match_clause = match self.ast.reading_clauses.as_slice() { - [ReadingClause::Match(mc)] => mc, - _ => return Ok(None), - }; - let path = match match_clause.patterns.as_slice() { - [GraphPattern::Path(p)] if !p.segments.is_empty() => p, - _ => return Ok(None), - }; - let cfg = self.require_config()?; - - // Handle single-segment variable-length paths by unrolling ranges (*1..N, capped) - if path.segments.len() == 1 { - if let Some(length_range) = &path.segments[0].relationship.length { - let cap: u32 = crate::MAX_VARIABLE_LENGTH_HOPS; - let min_len = length_range.min.unwrap_or(1).max(1); - let max_len = length_range.max.unwrap_or(cap); - - if min_len > max_len { - return Err(GraphError::InvalidPattern { - message: format!( - "Invalid variable-length range: min {:?} greater than max {:?}", - length_range.min, length_range.max - ), - location: snafu::Location::new(file!(), line!(), column!()), - }); - } - - if max_len > cap { - return Err(GraphError::UnsupportedFeature { - feature: format!( - "Variable-length paths with length > {} are not supported (got {:?}..{:?})", - cap, length_range.min, length_range.max - ), - location: snafu::Location::new(file!(), line!(), column!()), - }); - } - - use datafusion::dataframe::DataFrame; - let mut union_df: Option = None; - - for hops in min_len..=max_len { - // Build a fixed-length synthetic path by repeating the single segment - let mut synthetic = crate::ast::PathPattern { - start_node: path.start_node.clone(), - segments: Vec::with_capacity(hops as usize), - }; - - for i in 0..hops { - let mut seg = path.segments[0].clone(); - // Drop variables to avoid alias collisions on repeated hops - seg.relationship.variable = None; - if (i + 1) < hops { - seg.end_node.variable = None; // intermediate hop - } - // Clear length spec for this fixed hop - seg.relationship.length = None; - synthetic.segments.push(seg); - } - - let exec = PathExecutor::new(ctx, cfg, &synthetic)?; - let mut df = exec.build_chain().await?; - df = exec.apply_where(df, &self.ast)?; - df = exec.apply_return(df, &self.ast)?; - - union_df = Some(match union_df { - Some(acc) => acc.union(df).map_err(|e| GraphError::PlanError { - message: format!("Failed to UNION variable-length paths: {}", e), - location: snafu::Location::new(file!(), line!(), column!()), - })?, - None => df, - }); - } - - return Ok(union_df); - } - } - - let exec = PathExecutor::new(ctx, cfg, path)?; - let df = exec.build_chain().await?; - let df = exec.apply_where(df, &self.ast)?; - let df = exec.apply_return(df, &self.ast)?; - Ok(Some(df)) - } } /// Builder for constructing Cypher queries programmatically @@ -1564,9 +1291,9 @@ mod tests { .with_config(cfg); let mut data = HashMap::new(); - data.insert("people".to_string(), batch); + data.insert("Person".to_string(), batch); - let out = q.execute_simple(data).await.unwrap(); + let out = q.execute(data, None).await.unwrap(); assert_eq!(out.num_rows(), 2); let names = out .column(0) @@ -1635,7 +1362,7 @@ mod tests { data.insert("Person".to_string(), people); data.insert("KNOWS".to_string(), knows); - let out = q.execute_simple(data).await.unwrap(); + let out = q.execute(data, None).await.unwrap(); // Expect two rows: Bob, Carol (the targets) let names = out .column(0) @@ -1681,9 +1408,9 @@ mod tests { .with_config(cfg); let mut data = HashMap::new(); - data.insert("people".to_string(), batch); + data.insert("Person".to_string(), batch); - let out = q.execute_simple(data).await.unwrap(); + let out = q.execute(data, None).await.unwrap(); let ages = out.column(1).as_any().downcast_ref::().unwrap(); let collected: Vec = (0..out.num_rows()).map(|i| ages.value(i)).collect(); assert_eq!(collected, vec![28, 29, 34, 42]); @@ -1720,9 +1447,9 @@ mod tests { .with_config(cfg); let mut data = HashMap::new(); - data.insert("people".to_string(), batch); + data.insert("Person".to_string(), batch); - let out = q.execute_simple(data).await.unwrap(); + let out = q.execute(data, None).await.unwrap(); assert_eq!(out.num_rows(), 2); let ages = out.column(0).as_any().downcast_ref::().unwrap(); let collected: Vec = (0..out.num_rows()).map(|i| ages.value(i)).collect(); @@ -1752,107 +1479,15 @@ mod tests { .with_config(cfg); let mut data = HashMap::new(); - data.insert("people".to_string(), batch); + data.insert("Person".to_string(), batch); - let out = q.execute_simple(data).await.unwrap(); + let out = q.execute(data, None).await.unwrap(); assert_eq!(out.num_rows(), 2); let ages = out.column(0).as_any().downcast_ref::().unwrap(); let collected: Vec = (0..out.num_rows()).map(|i| ages.value(i)).collect(); assert_eq!(collected, vec![30, 40]); } - #[tokio::test] - async fn test_execute_datafusion_pipeline() { - use arrow_array::{Int64Array, RecordBatch, StringArray}; - use arrow_schema::{DataType, Field, Schema}; - use std::sync::Arc; - - // Create test data - let schema = Arc::new(Schema::new(vec![ - Field::new("id", DataType::Int64, false), - Field::new("name", DataType::Utf8, false), - Field::new("age", DataType::Int64, false), - ])); - - let batch = RecordBatch::try_new( - schema, - vec![ - Arc::new(Int64Array::from(vec![1, 2, 3])), - Arc::new(StringArray::from(vec!["Alice", "Bob", "Charlie"])), - Arc::new(Int64Array::from(vec![25, 35, 30])), - ], - ) - .unwrap(); - - let cfg = GraphConfig::builder() - .with_node_label("Person", "id") - .build() - .unwrap(); - - // Test simple node query with DataFusion pipeline - let query = CypherQuery::new("MATCH (p:Person) WHERE p.age > 30 RETURN p.name") - .unwrap() - .with_config(cfg); - - let mut datasets = HashMap::new(); - datasets.insert("Person".to_string(), batch); - - // Execute using the new DataFusion pipeline - let result = query.execute_datafusion(datasets.clone()).await; - - match &result { - Ok(batch) => { - println!( - "DataFusion result: {} rows, {} columns", - batch.num_rows(), - batch.num_columns() - ); - if batch.num_rows() > 0 { - println!("First row data: {:?}", batch.slice(0, 1)); - } - } - Err(e) => { - println!("DataFusion execution failed: {:?}", e); - } - } - - // For comparison, try legacy execution - let legacy_result = query.execute_simple(datasets).await.unwrap(); - println!( - "Legacy result: {} rows, {} columns", - legacy_result.num_rows(), - legacy_result.num_columns() - ); - - let result = result.unwrap(); - - // Verify correct filtering: should return 1 row (Bob with age > 30) - assert_eq!( - result.num_rows(), - 1, - "Expected 1 row after filtering WHERE p.age > 30" - ); - - // Verify correct projection: should return 1 column (name) - assert_eq!( - result.num_columns(), - 1, - "Expected 1 column after projection RETURN p.name" - ); - - // Verify correct data: should contain "Bob" - let names = result - .column(0) - .as_any() - .downcast_ref::() - .unwrap(); - assert_eq!( - names.value(0), - "Bob", - "Expected filtered result to contain Bob" - ); - } - #[tokio::test] async fn test_execute_datafusion_simple_scan() { use arrow_array::{Int64Array, RecordBatch, StringArray}; @@ -2325,15 +1960,6 @@ mod tests { .collect(); values.sort(); assert_eq!(values, vec!["Bob".to_string(), "David".to_string()]); - - let err = query - .execute_with_namespace(namespace, Some(ExecutionStrategy::Simple)) - .await - .expect_err("simple strategy not supported"); - assert!( - matches!(err, GraphError::UnsupportedFeature { .. }), - "expected unsupported feature error, got {err:?}" - ); } #[tokio::test] @@ -2358,7 +1984,7 @@ mod tests { .unwrap() .with_config(cfg); - let result = query.execute_simple(datasets).await; + let result = query.execute(datasets, None).await; assert!(result.is_err()); match result { diff --git a/crates/lance-graph/src/simple_executor/aliases.rs b/crates/lance-graph/src/simple_executor/aliases.rs deleted file mode 100644 index 45b6b95..0000000 --- a/crates/lance-graph/src/simple_executor/aliases.rs +++ /dev/null @@ -1,44 +0,0 @@ -// SPDX-License-Identifier: Apache-2.0 -// SPDX-FileCopyrightText: Copyright The Lance Authors - -use crate::case_insensitive::qualify_column; - -/// Qualify a column name for internal DataFusion operations. -/// Returns format: `alias__property` (e.g., "p__name"). -/// Both alias and property are normalized to lowercase for case-insensitive behavior. -/// Note: This is for internal use only. Final output uses Cypher dot notation. -#[inline] -pub(super) fn qualify_alias_property(alias: &str, property: &str) -> String { - qualify_column(alias, property) -} - -/// Convert to Cypher-style column name for query results. -/// Returns format: `alias.property` (e.g., "p.name"). -/// This matches the output format used by the DataFusion executor. -pub(super) fn to_cypher_column_name(alias: &str, property: &str) -> String { - format!("{}.{}", alias, property) -} - -#[cfg(test)] -mod tests { - use super::*; - - #[test] - fn test_qualify_alias_property() { - // Should normalize to lowercase - assert_eq!(qualify_alias_property("p", "name"), "p__name"); - assert_eq!(qualify_alias_property("person", "age"), "person__age"); - assert_eq!(qualify_alias_property("P", "Name"), "p__name"); - assert_eq!(qualify_alias_property("PERSON", "AGE"), "person__age"); - assert_eq!( - qualify_alias_property("Person", "fullName"), - "person__fullname" - ); - } - - #[test] - fn test_to_cypher_column_name() { - assert_eq!(to_cypher_column_name("p", "name"), "p.name"); - assert_eq!(to_cypher_column_name("c", "company_name"), "c.company_name"); - } -} diff --git a/crates/lance-graph/src/simple_executor/clauses.rs b/crates/lance-graph/src/simple_executor/clauses.rs deleted file mode 100644 index 66c6325..0000000 --- a/crates/lance-graph/src/simple_executor/clauses.rs +++ /dev/null @@ -1,93 +0,0 @@ -// SPDX-License-Identifier: Apache-2.0 -// SPDX-FileCopyrightText: Copyright The Lance Authors - -use crate::error::Result; - -pub(super) fn apply_where_with_qualifier( - mut df: datafusion::dataframe::DataFrame, - ast: &crate::ast::CypherQuery, - qualify: &dyn Fn(&str, &str) -> String, -) -> Result { - use super::expr::to_df_boolean_expr_with_vars; - use crate::error::GraphError; - if let Some(where_clause) = &ast.where_clause { - if let Some(expr) = - to_df_boolean_expr_with_vars(&where_clause.expression, &|v, p| qualify(v, p)) - { - df = df.filter(expr).map_err(|e| GraphError::PlanError { - message: format!("Failed to apply WHERE: {}", e), - location: snafu::Location::new(file!(), line!(), column!()), - })?; - } - } - Ok(df) -} - -pub(super) fn apply_return_with_qualifier( - mut df: datafusion::dataframe::DataFrame, - ast: &crate::ast::CypherQuery, - qualify: &dyn Fn(&str, &str) -> String, -) -> Result { - use crate::error::GraphError; - use datafusion::logical_expr::Expr; - let mut proj: Vec = Vec::new(); - for item in &ast.return_clause.items { - if let crate::ast::ValueExpression::Property(prop) = &item.expression { - let col_name = qualify(&prop.variable, &prop.property); - let mut e = datafusion::logical_expr::col(col_name); - if let Some(a) = &item.alias { - e = e.alias(a); - } else { - let cypher_name = - super::aliases::to_cypher_column_name(&prop.variable, &prop.property); - e = e.alias(cypher_name); - } - proj.push(e); - } - } - if !proj.is_empty() { - df = df.select(proj).map_err(|e| GraphError::PlanError { - message: format!("Failed to project: {}", e), - location: snafu::Location::new(file!(), line!(), column!()), - })?; - } - if ast.return_clause.distinct { - df = df.distinct().map_err(|e| GraphError::PlanError { - message: format!("Failed to apply DISTINCT: {}", e), - location: snafu::Location::new(file!(), line!(), column!()), - })?; - } - // ORDER BY - if let Some(order_by) = &ast.order_by { - use datafusion::logical_expr::SortExpr; - let mut sorts: Vec = Vec::new(); - for item in &order_by.items { - if let crate::ast::ValueExpression::Property(prop) = &item.expression { - let col_name = qualify(&prop.variable, &prop.property); - let col = datafusion::logical_expr::col(col_name); - let asc = matches!(item.direction, crate::ast::SortDirection::Ascending); - sorts.push(SortExpr { - expr: col, - asc, - nulls_first: false, - }); - } - } - if !sorts.is_empty() { - df = df.sort(sorts).map_err(|e| GraphError::PlanError { - message: format!("Failed to apply ORDER BY: {}", e), - location: snafu::Location::new(file!(), line!(), column!()), - })?; - } - } - // SKIP/OFFSET and LIMIT - if ast.skip.is_some() || ast.limit.is_some() { - let offset = ast.skip.unwrap_or(0) as usize; - let fetch = ast.limit.map(|l| l as usize); - df = df.limit(offset, fetch).map_err(|e| GraphError::PlanError { - message: format!("Failed to apply SKIP/LIMIT: {}", e), - location: snafu::Location::new(file!(), line!(), column!()), - })?; - } - Ok(df) -} diff --git a/crates/lance-graph/src/simple_executor/expr.rs b/crates/lance-graph/src/simple_executor/expr.rs deleted file mode 100644 index 21772db..0000000 --- a/crates/lance-graph/src/simple_executor/expr.rs +++ /dev/null @@ -1,263 +0,0 @@ -// SPDX-License-Identifier: Apache-2.0 -// SPDX-FileCopyrightText: Copyright The Lance Authors - -//! Expression translation helpers for the simple executor - -pub(super) fn to_df_boolean_expr_with_vars( - expr: &crate::ast::BooleanExpression, - qualify: &F, -) -> Option -where - F: Fn(&str, &str) -> String, -{ - use crate::ast::{BooleanExpression as BE, ComparisonOperator as CO, ValueExpression as VE}; - use datafusion::logical_expr::{col, Expr, Operator}; - match expr { - BE::Comparison { - left, - operator, - right, - } => { - let (var, prop, lit_expr) = match (left, right) { - (VE::Property(p), VE::Literal(val)) => { - (p.variable.as_str(), p.property.as_str(), to_df_literal(val)) - } - (VE::Literal(val), VE::Property(p)) => { - (p.variable.as_str(), p.property.as_str(), to_df_literal(val)) - } - _ => return None, - }; - let qualified = qualify(var, prop); - let op = match operator { - CO::Equal => Operator::Eq, - CO::NotEqual => Operator::NotEq, - CO::LessThan => Operator::Lt, - CO::LessThanOrEqual => Operator::LtEq, - CO::GreaterThan => Operator::Gt, - CO::GreaterThanOrEqual => Operator::GtEq, - }; - Some(Expr::BinaryExpr(datafusion::logical_expr::BinaryExpr { - left: Box::new(col(&qualified)), - op, - right: Box::new(lit_expr), - })) - } - BE::And(l, r) => Some(datafusion::logical_expr::Expr::BinaryExpr( - datafusion::logical_expr::BinaryExpr { - left: Box::new(to_df_boolean_expr_with_vars(l, qualify)?), - op: Operator::And, - right: Box::new(to_df_boolean_expr_with_vars(r, qualify)?), - }, - )), - BE::Or(l, r) => Some(datafusion::logical_expr::Expr::BinaryExpr( - datafusion::logical_expr::BinaryExpr { - left: Box::new(to_df_boolean_expr_with_vars(l, qualify)?), - op: Operator::Or, - right: Box::new(to_df_boolean_expr_with_vars(r, qualify)?), - }, - )), - BE::Not(inner) => Some(datafusion::logical_expr::Expr::Not(Box::new( - to_df_boolean_expr_with_vars(inner, qualify)?, - ))), - _ => None, - } -} - -pub(super) fn to_df_literal(val: &crate::ast::PropertyValue) -> datafusion::logical_expr::Expr { - use datafusion::logical_expr::lit; - match val { - crate::ast::PropertyValue::String(s) => lit(s.clone()), - crate::ast::PropertyValue::Integer(i) => lit(*i), - crate::ast::PropertyValue::Float(f) => lit(*f), - crate::ast::PropertyValue::Boolean(b) => lit(*b), - crate::ast::PropertyValue::Null => { - datafusion::logical_expr::Expr::Literal(datafusion::scalar::ScalarValue::Null, None) - } - crate::ast::PropertyValue::Parameter(_) => lit(0), - crate::ast::PropertyValue::Property(prop) => datafusion::logical_expr::col(&prop.property), - } -} - -/// Minimal translator for simple boolean expressions into DataFusion Expr -pub(crate) fn to_df_boolean_expr_simple( - expr: &crate::ast::BooleanExpression, -) -> Option { - use crate::ast::{BooleanExpression as BE, ComparisonOperator as CO, ValueExpression as VE}; - use datafusion::logical_expr::{col, Expr, Operator}; - match expr { - BE::Comparison { - left, - operator, - right, - } => { - let (col_name, lit_expr) = match (left, right) { - (VE::Property(prop), VE::Literal(val)) => { - (prop.property.clone(), to_df_literal(val)) - } - (VE::Literal(val), VE::Property(prop)) => { - (prop.property.clone(), to_df_literal(val)) - } - _ => return None, - }; - let op = match operator { - CO::Equal => Operator::Eq, - CO::NotEqual => Operator::NotEq, - CO::LessThan => Operator::Lt, - CO::LessThanOrEqual => Operator::LtEq, - CO::GreaterThan => Operator::Gt, - CO::GreaterThanOrEqual => Operator::GtEq, - }; - Some(Expr::BinaryExpr(datafusion::logical_expr::BinaryExpr { - left: Box::new(col(col_name)), - op, - right: Box::new(lit_expr), - })) - } - BE::And(l, r) => Some(datafusion::logical_expr::Expr::BinaryExpr( - datafusion::logical_expr::BinaryExpr { - left: Box::new(to_df_boolean_expr_simple(l)?), - op: Operator::And, - right: Box::new(to_df_boolean_expr_simple(r)?), - }, - )), - BE::Or(l, r) => Some(datafusion::logical_expr::Expr::BinaryExpr( - datafusion::logical_expr::BinaryExpr { - left: Box::new(to_df_boolean_expr_simple(l)?), - op: Operator::Or, - right: Box::new(to_df_boolean_expr_simple(r)?), - }, - )), - BE::Not(inner) => Some(datafusion::logical_expr::Expr::Not(Box::new( - to_df_boolean_expr_simple(inner)?, - ))), - BE::Exists(prop) => Some(datafusion::logical_expr::Expr::IsNotNull(Box::new( - datafusion::logical_expr::Expr::Column(datafusion::common::Column::from_name( - prop.property.clone(), - )), - ))), - _ => None, - } -} - -/// Build ORDER BY expressions for simple queries -pub(crate) fn to_df_order_by_expr_simple( - items: &[crate::ast::OrderByItem], -) -> Vec { - use datafusion::logical_expr::SortExpr; - items - .iter() - .map(|item| { - let expr = to_df_value_expr_simple(&item.expression); - let asc = matches!(item.direction, crate::ast::SortDirection::Ascending); - SortExpr { - expr, - asc, - nulls_first: false, - } - }) - .collect() -} - -/// Build value expressions for simple queries -pub(crate) fn to_df_value_expr_simple( - expr: &crate::ast::ValueExpression, -) -> datafusion::logical_expr::Expr { - use crate::ast::ValueExpression as VE; - use datafusion::functions::string::{lower, upper}; - use datafusion::logical_expr::{col, lit, BinaryExpr, Expr, Operator}; - match expr { - VE::Property(prop) => col(&prop.property), - VE::Variable(v) => col(v), - VE::Literal(v) => to_df_literal(v), - VE::ScalarFunction { name, args } => match name.to_lowercase().as_str() { - "tolower" | "lower" => { - if args.len() == 1 { - lower().call(vec![to_df_value_expr_simple(&args[0])]) - } else { - Expr::Literal(datafusion::scalar::ScalarValue::Null, None) - } - } - "toupper" | "upper" => { - if args.len() == 1 { - upper().call(vec![to_df_value_expr_simple(&args[0])]) - } else { - Expr::Literal(datafusion::scalar::ScalarValue::Null, None) - } - } - _ => Expr::Literal(datafusion::scalar::ScalarValue::Null, None), - }, - VE::AggregateFunction { .. } => { - // Aggregates not supported in simple executor - Expr::Literal(datafusion::scalar::ScalarValue::Null, None) - } - VE::Arithmetic { - left, - operator, - right, - } => { - use crate::ast::ArithmeticOperator as AO; - let l = to_df_value_expr_simple(left); - let r = to_df_value_expr_simple(right); - let op = match operator { - AO::Add => Operator::Plus, - AO::Subtract => Operator::Minus, - AO::Multiply => Operator::Multiply, - AO::Divide => Operator::Divide, - AO::Modulo => Operator::Modulo, - }; - Expr::BinaryExpr(BinaryExpr { - left: Box::new(l), - op, - right: Box::new(r), - }) - } - VE::VectorDistance { .. } => lit(0.0f32), - VE::VectorSimilarity { .. } => lit(1.0f32), - VE::Parameter(_) => lit(0), - VE::VectorLiteral(_) => lit(0.0f32), - } -} - -#[cfg(test)] -mod tests { - use super::*; - use crate::ast::{ArithmeticOperator, PropertyRef, ValueExpression}; - use datafusion::logical_expr::Expr; - use datafusion::scalar::ScalarValue; - - #[test] - fn test_simple_expr_unknown_function_returns_null() { - let expr = ValueExpression::ScalarFunction { - name: "replace".to_string(), - args: vec![ValueExpression::Property(PropertyRef::new("p", "name"))], - }; - let df_expr = to_df_value_expr_simple(&expr); - assert!(matches!(df_expr, Expr::Literal(ScalarValue::Null, _))); - } - - #[test] - fn test_simple_expr_lower_wrong_arity_returns_null() { - let expr = ValueExpression::ScalarFunction { - name: "lower".to_string(), - args: vec![ - ValueExpression::Property(PropertyRef::new("p", "name")), - ValueExpression::Property(PropertyRef::new("p", "name")), - ], - }; - let df_expr = to_df_value_expr_simple(&expr); - assert!(matches!(df_expr, Expr::Literal(ScalarValue::Null, _))); - } - - #[test] - fn test_simple_expr_arithmetic_builds_binary_expr() { - let expr = ValueExpression::Arithmetic { - left: Box::new(ValueExpression::Variable("x".to_string())), - operator: ArithmeticOperator::Add, - right: Box::new(ValueExpression::Literal( - crate::ast::PropertyValue::Integer(1), - )), - }; - let df_expr = to_df_value_expr_simple(&expr); - assert!(matches!(df_expr, Expr::BinaryExpr(_))); - } -} diff --git a/crates/lance-graph/src/simple_executor/mod.rs b/crates/lance-graph/src/simple_executor/mod.rs deleted file mode 100644 index 1ea2484..0000000 --- a/crates/lance-graph/src/simple_executor/mod.rs +++ /dev/null @@ -1,20 +0,0 @@ -// SPDX-License-Identifier: Apache-2.0 -// SPDX-FileCopyrightText: Copyright The Lance Authors - -//! Simple single-table query executor with limited Cypher feature support -//! -//! This module provides a lightweight execution strategy for basic Cypher queries -//! that don't require the full DataFusion planner. It supports: -//! - Single-table scans with property filters -//! - Multi-hop path patterns via join chains -//! - Basic projections, DISTINCT, ORDER BY, SKIP, and LIMIT - -mod aliases; -mod clauses; -mod expr; -mod path_executor; - -pub(crate) use expr::{ - to_df_boolean_expr_simple, to_df_order_by_expr_simple, to_df_value_expr_simple, -}; -pub(crate) use path_executor::PathExecutor; diff --git a/crates/lance-graph/src/simple_executor/path_executor.rs b/crates/lance-graph/src/simple_executor/path_executor.rs deleted file mode 100644 index eaea5e5..0000000 --- a/crates/lance-graph/src/simple_executor/path_executor.rs +++ /dev/null @@ -1,304 +0,0 @@ -// SPDX-License-Identifier: Apache-2.0 -// SPDX-FileCopyrightText: Copyright The Lance Authors - -use crate::case_insensitive::qualify_column; -use crate::error::{GraphError, Result}; -use datafusion::logical_expr::JoinType; - -// Internal helper that plans and executes a single path by chaining joins. -pub(crate) struct PathExecutor<'a> { - pub(super) ctx: &'a datafusion::prelude::SessionContext, - pub(super) path: &'a crate::ast::PathPattern, - pub(super) start_label: &'a str, - pub(super) start_alias: String, - segs: Vec>, - node_maps: std::collections::HashMap, - rel_maps: std::collections::HashMap, -} - -#[derive(Clone)] -struct SegMeta<'a> { - rel_type: &'a str, - end_label: &'a str, - dir: crate::ast::RelationshipDirection, - rel_alias: String, - end_alias: String, -} - -impl<'a> PathExecutor<'a> { - pub(crate) fn new( - ctx: &'a datafusion::prelude::SessionContext, - cfg: &'a crate::config::GraphConfig, - path: &'a crate::ast::PathPattern, - ) -> Result { - use std::collections::{HashMap, HashSet}; - let mut used: HashSet = HashSet::new(); - let mut uniq = |desired: &str| -> String { - if used.insert(desired.to_string()) { - return desired.to_string(); - } - let mut i = 2usize; - loop { - let cand = format!("{}_{}", desired, i); - if used.insert(cand.clone()) { - break cand; - } - i += 1; - } - }; - - let start_label = path - .start_node - .labels - .first() - .map(|s| s.as_str()) - .ok_or_else(|| GraphError::PlanError { - message: "Start node must have a label".to_string(), - location: snafu::Location::new(file!(), line!(), column!()), - })?; - let start_alias = uniq( - &path - .start_node - .variable - .as_deref() - .unwrap_or(start_label) - .to_lowercase(), - ); - - let mut segs: Vec = Vec::with_capacity(path.segments.len()); - for seg in &path.segments { - let rel_type = seg - .relationship - .types - .first() - .map(|s| s.as_str()) - .ok_or_else(|| GraphError::PlanError { - message: "Relationship must have a type".to_string(), - location: snafu::Location::new(file!(), line!(), column!()), - })?; - let end_label = seg - .end_node - .labels - .first() - .map(|s| s.as_str()) - .ok_or_else(|| GraphError::PlanError { - message: "End node must have a label".to_string(), - location: snafu::Location::new(file!(), line!(), column!()), - })?; - let rel_alias = uniq( - &seg.relationship - .variable - .as_deref() - .unwrap_or(rel_type) - .to_lowercase(), - ); - let end_alias = uniq( - &seg.end_node - .variable - .as_deref() - .unwrap_or(end_label) - .to_lowercase(), - ); - segs.push(SegMeta { - rel_type, - end_label, - dir: seg.relationship.direction.clone(), - rel_alias, - end_alias, - }); - } - - let mut node_maps: HashMap = HashMap::new(); - let mut rel_maps: HashMap = HashMap::new(); - node_maps.insert( - start_alias.to_lowercase(), - cfg.get_node_mapping(start_label) - .ok_or_else(|| GraphError::PlanError { - message: format!("No node mapping for '{}'", start_label), - location: snafu::Location::new(file!(), line!(), column!()), - })?, - ); - for seg in &segs { - node_maps.insert( - seg.end_alias.to_lowercase(), - cfg.get_node_mapping(seg.end_label) - .ok_or_else(|| GraphError::PlanError { - message: format!("No node mapping for '{}'", seg.end_label), - location: snafu::Location::new(file!(), line!(), column!()), - })?, - ); - rel_maps.insert( - seg.rel_alias.to_lowercase(), - cfg.get_relationship_mapping(seg.rel_type).ok_or_else(|| { - GraphError::PlanError { - message: format!("No relationship mapping for '{}'", seg.rel_type), - location: snafu::Location::new(file!(), line!(), column!()), - } - })?, - ); - } - - Ok(Self { - ctx, - path, - start_label, - start_alias, - segs, - node_maps, - rel_maps, - }) - } - - async fn open_aliased( - &self, - table: &str, - alias: &str, - ) -> Result { - let df = self - .ctx - .table(table) - .await - .map_err(|e| GraphError::PlanError { - message: format!("Failed to read table '{}': {}", table, e), - location: snafu::Location::new(file!(), line!(), column!()), - })?; - let schema = df.schema(); - let proj: Vec = schema - .fields() - .iter() - .map(|f| datafusion::logical_expr::col(f.name()).alias(qualify_column(alias, f.name()))) - .collect(); - df.alias(alias)? - .select(proj) - .map_err(|e| GraphError::PlanError { - message: format!("Failed to alias/select '{}': {}", table, e), - location: snafu::Location::new(file!(), line!(), column!()), - }) - } - - pub(crate) async fn build_chain(&self) -> Result { - // Start node - let mut df = self - .open_aliased(self.start_label, &self.start_alias) - .await?; - // Inline property filters on start node - for (k, v) in &self.path.start_node.properties { - let expr = super::expr::to_df_literal(v); - df = df - .filter(datafusion::logical_expr::Expr::BinaryExpr( - datafusion::logical_expr::BinaryExpr { - left: Box::new(datafusion::logical_expr::col(format!( - "{}__{}", - self.start_alias, k - ))), - op: datafusion::logical_expr::Operator::Eq, - right: Box::new(expr), - }, - )) - .map_err(|e| GraphError::PlanError { - message: format!("Failed to apply filter: {}", e), - location: snafu::Location::new(file!(), line!(), column!()), - })?; - } - - // Chain joins for each hop - let mut current_node_alias = self.start_alias.as_str(); - for s in &self.segs { - let rel_df = self.open_aliased(s.rel_type, &s.rel_alias).await?; - let node_map = self - .node_maps - .get(¤t_node_alias.to_lowercase()) - .unwrap(); - let rel_map = self.rel_maps.get(&s.rel_alias.to_lowercase()).unwrap(); - let (left_key, right_key) = match s.dir { - crate::ast::RelationshipDirection::Outgoing - | crate::ast::RelationshipDirection::Undirected => ( - qualify_column(current_node_alias, &node_map.id_field), - qualify_column(&s.rel_alias, &rel_map.source_id_field), - ), - crate::ast::RelationshipDirection::Incoming => ( - qualify_column(current_node_alias, &node_map.id_field), - qualify_column(&s.rel_alias, &rel_map.target_id_field), - ), - }; - df = df - .join( - rel_df, - JoinType::Inner, - &[left_key.as_str()], - &[right_key.as_str()], - None, - ) - .map_err(|e| GraphError::PlanError { - message: format!("Join failed (node->rel): {}", e), - location: snafu::Location::new(file!(), line!(), column!()), - })?; - - let end_df = self.open_aliased(s.end_label, &s.end_alias).await?; - let end_node_map = self.node_maps.get(&s.end_alias.to_lowercase()).unwrap(); - let (left_key2, right_key2) = match s.dir { - crate::ast::RelationshipDirection::Outgoing - | crate::ast::RelationshipDirection::Undirected => ( - qualify_column(&s.rel_alias, &rel_map.target_id_field), - qualify_column(&s.end_alias, &end_node_map.id_field), - ), - crate::ast::RelationshipDirection::Incoming => ( - qualify_column(&s.rel_alias, &rel_map.source_id_field), - qualify_column(&s.end_alias, &end_node_map.id_field), - ), - }; - df = df - .join( - end_df, - JoinType::Inner, - &[left_key2.as_str()], - &[right_key2.as_str()], - None, - ) - .map_err(|e| GraphError::PlanError { - message: format!("Join failed (rel->node): {}", e), - location: snafu::Location::new(file!(), line!(), column!()), - })?; - current_node_alias = &s.end_alias; - } - - Ok(df) - } - - fn resolve_var_alias<'b>(&'b self, var: &str) -> Option<&'b str> { - if Some(var) == self.path.start_node.variable.as_deref() { - return Some(self.start_alias.as_str()); - } - for (i, seg) in self.path.segments.iter().enumerate() { - if Some(var) == seg.relationship.variable.as_deref() { - return Some(self.segs[i].rel_alias.as_str()); - } - if Some(var) == seg.end_node.variable.as_deref() { - return Some(self.segs[i].end_alias.as_str()); - } - } - None - } - - pub(crate) fn apply_where( - &self, - df: datafusion::dataframe::DataFrame, - ast: &crate::ast::CypherQuery, - ) -> Result { - super::clauses::apply_where_with_qualifier(df, ast, &|var, prop| { - let alias = self.resolve_var_alias(var).unwrap_or(var); - super::aliases::qualify_alias_property(alias, prop) - }) - } - - pub(crate) fn apply_return( - &self, - df: datafusion::dataframe::DataFrame, - ast: &crate::ast::CypherQuery, - ) -> Result { - super::clauses::apply_return_with_qualifier(df, ast, &|var, prop| { - let alias = self.resolve_var_alias(var).unwrap_or(var); - super::aliases::qualify_alias_property(alias, prop) - }) - } -} diff --git a/crates/lance-graph/tests/test_simple_executor_pipeline.rs b/crates/lance-graph/tests/test_simple_executor_pipeline.rs deleted file mode 100644 index 097b116..0000000 --- a/crates/lance-graph/tests/test_simple_executor_pipeline.rs +++ /dev/null @@ -1,73 +0,0 @@ -// SPDX-License-Identifier: Apache-2.0 -// SPDX-FileCopyrightText: Copyright The Lance Authors - -use std::{collections::HashMap, sync::Arc}; - -use arrow_array::{Int64Array, RecordBatch, StringArray}; -use arrow_schema::{DataType, Field, Schema}; -use lance_graph::{CypherQuery, ExecutionStrategy, GraphConfig}; - -fn create_person_batch() -> RecordBatch { - let schema = Arc::new(Schema::new(vec![ - Field::new("id", DataType::Int64, false), - Field::new("name", DataType::Utf8, false), - ])); - - let ids = Arc::new(Int64Array::from(vec![1, 2, 3, 4, 5])); - let names = Arc::new(StringArray::from(vec![ - "Alice", "Bob", "Charlie", "David", "Eve", - ])); - - RecordBatch::try_new(schema, vec![ids, names]).unwrap() -} - -#[tokio::test] -async fn test_tolower_works_in_simple_executor() { - let person_batch = create_person_batch(); - let config = GraphConfig::builder() - .with_node_label("Person", "id") - .build() - .unwrap(); - - let query = CypherQuery::new( - "MATCH (p:Person) RETURN p.name AS name, tolower(p.name) AS lowered ORDER BY name", - ) - .unwrap() - .with_config(config); - - let mut datasets = HashMap::new(); - datasets.insert("Person".to_string(), person_batch); - - let result = query - .execute(datasets, Some(ExecutionStrategy::Simple)) - .await - .unwrap(); - - let names = result - .column_by_name("name") - .unwrap() - .as_any() - .downcast_ref::() - .unwrap(); - let lowered = result - .column_by_name("lowered") - .unwrap() - .as_any() - .downcast_ref::() - .unwrap(); - - let got: Vec<(String, String)> = (0..result.num_rows()) - .map(|i| (names.value(i).to_string(), lowered.value(i).to_string())) - .collect(); - - assert_eq!( - got, - vec![ - ("Alice".to_string(), "alice".to_string()), - ("Bob".to_string(), "bob".to_string()), - ("Charlie".to_string(), "charlie".to_string()), - ("David".to_string(), "david".to_string()), - ("Eve".to_string(), "eve".to_string()), - ] - ); -} From 7ecfea629db5bc5a4e8507d513957bc6d61f6356 Mon Sep 17 00:00:00 2001 From: beinan Date: Wed, 4 Mar 2026 00:29:25 +0000 Subject: [PATCH 2/2] style: fix formatting and remove unused imports - Run cargo fmt to fix formatting issues - Remove empty impl CypherQuery {} block - Remove unused ExecutionStrategy import from benchmarks Co-Authored-By: Claude Opus 4.6 --- crates/lance-graph/benches/graph_execution.rs | 8 ++------ crates/lance-graph/src/query.rs | 4 ---- 2 files changed, 2 insertions(+), 10 deletions(-) diff --git a/crates/lance-graph/benches/graph_execution.rs b/crates/lance-graph/benches/graph_execution.rs index 45ed854..fab1a94 100644 --- a/crates/lance-graph/benches/graph_execution.rs +++ b/crates/lance-graph/benches/graph_execution.rs @@ -22,7 +22,7 @@ use arrow_schema::{DataType, Field, Schema as ArrowSchema}; use criterion::{black_box, criterion_group, criterion_main, BenchmarkId, Criterion, Throughput}; use futures::TryStreamExt; use lance::dataset::{Dataset, WriteMode, WriteParams}; -use lance_graph::{CypherQuery, ExecutionStrategy, GraphConfig}; +use lance_graph::{CypherQuery, GraphConfig}; use tempfile::TempDir; fn create_people_batch() -> RecordBatch { @@ -71,11 +71,7 @@ fn execute_cypher_query( q: &CypherQuery, datasets: HashMap, ) -> RecordBatch { - rt.block_on(async move { - q.execute(datasets, None) - .await - .unwrap() - }) + rt.block_on(async move { q.execute(datasets, None).await.unwrap() }) } fn make_people_batch(n: usize) -> RecordBatch { diff --git a/crates/lance-graph/src/query.rs b/crates/lance-graph/src/query.rs index c43d1fe..9625f27 100644 --- a/crates/lance-graph/src/query.rs +++ b/crates/lance-graph/src/query.rs @@ -912,7 +912,6 @@ impl CypherQuery { Ok(output) } - /// Get all node labels referenced in this query pub fn referenced_node_labels(&self) -> Vec { let mut labels = Vec::new(); @@ -1071,9 +1070,6 @@ impl CypherQuery { } } -impl CypherQuery { -} - /// Builder for constructing Cypher queries programmatically #[derive(Debug, Default)] pub struct CypherQueryBuilder {