From e5969a41076b8ad890e524a344a87d4ee3482b62 Mon Sep 17 00:00:00 2001 From: Youssef Date: Wed, 7 Jan 2026 18:50:23 +0100 Subject: [PATCH 1/4] feat: add toLower/toUpper support, fix type coercion error - Add support for toLower/toUpper (Cypher) and lower/upper (SQL) string functions - Change fallback for unsupported functions from lit(0) to ScalarValue::Null to prevent type coercion errors in any context (string or numeric) - Add unit tests for new string functions - Add integration tests including exact bug reproduction Fixes type coercion error when using toLower(s.name) CONTAINS 'x' with integer columns in RETURN clause. --- .../src/datafusion_planner/expression.rs | 150 +++++++++++++++++- .../tests/test_datafusion_pipeline.rs | 134 ++++++++++++++++ 2 files changed, 282 insertions(+), 2 deletions(-) diff --git a/rust/lance-graph/src/datafusion_planner/expression.rs b/rust/lance-graph/src/datafusion_planner/expression.rs index 3c8d981..756cb6b 100644 --- a/rust/lance-graph/src/datafusion_planner/expression.rs +++ b/rust/lance-graph/src/datafusion_planner/expression.rs @@ -8,6 +8,8 @@ use crate::ast::{BooleanExpression, PropertyValue, ValueExpression}; use crate::datafusion_planner::udf; use datafusion::logical_expr::{col, lit, BinaryExpr, Expr, Operator}; +use datafusion::functions::string::lower; +use datafusion::functions::string::upper; use datafusion_functions_aggregate::average::avg; use datafusion_functions_aggregate::count::count; use datafusion_functions_aggregate::min_max::max; @@ -186,9 +188,29 @@ pub(crate) fn to_df_value_expr(expr: &ValueExpression) -> Expr { lit(0) } } + // String functions + "tolower" | "lower" => { + if args.len() == 1 { + let arg_expr = to_df_value_expr(&args[0]); + lower().call(vec![arg_expr]) + } else { + // Invalid argument count - return NULL + Expr::Literal(datafusion::scalar::ScalarValue::Null, None) + } + } + "toupper" | "upper" => { + if args.len() == 1 { + let arg_expr = to_df_value_expr(&args[0]); + upper().call(vec![arg_expr]) + } else { + // Invalid argument count - return NULL + Expr::Literal(datafusion::scalar::ScalarValue::Null, None) + } + } _ => { - // Unsupported function - return placeholder for now - lit(0) + // Unsupported function - return NULL which coerces to any type + // This prevents type coercion errors in both string and numeric contexts + Expr::Literal(datafusion::scalar::ScalarValue::Null, None) } } } @@ -1255,4 +1277,128 @@ mod tests { let name = to_cypher_column_name(&expr); assert_eq!(name, "expr", "Arithmetic should use generic name"); } + + // ======================================================================== + // Unit tests for string functions (toLower, toUpper) + // ======================================================================== + + #[test] + fn test_value_expr_function_tolower() { + let expr = ValueExpression::Function { + name: "toLower".into(), + args: vec![ValueExpression::Property(PropertyRef { + variable: "p".into(), + property: "name".into(), + })], + }; + + let df_expr = to_df_value_expr(&expr); + let s = format!("{:?}", df_expr); + // Should be a ScalarFunction with lower + assert!( + s.contains("lower") || s.contains("Lower"), + "Should use lower function, got: {}", + s + ); + assert!(s.contains("p__name"), "Should contain column reference"); + } + + #[test] + fn test_value_expr_function_toupper() { + let expr = ValueExpression::Function { + name: "toUpper".into(), + args: vec![ValueExpression::Property(PropertyRef { + variable: "p".into(), + property: "name".into(), + })], + }; + + let df_expr = to_df_value_expr(&expr); + let s = format!("{:?}", df_expr); + // Should be a ScalarFunction with upper + assert!( + s.contains("upper") || s.contains("Upper"), + "Should use upper function, got: {}", + s + ); + assert!(s.contains("p__name"), "Should contain column reference"); + } + + #[test] + fn test_value_expr_function_lower_alias() { + // Test that 'lower' also works (SQL-style alias) + let expr = ValueExpression::Function { + name: "lower".into(), + args: vec![ValueExpression::Property(PropertyRef { + variable: "p".into(), + property: "name".into(), + })], + }; + + let df_expr = to_df_value_expr(&expr); + let s = format!("{:?}", df_expr); + assert!( + s.contains("lower") || s.contains("Lower"), + "Should use lower function, got: {}", + s + ); + } + + #[test] + fn test_value_expr_function_upper_alias() { + // Test that 'upper' also works (SQL-style alias) + let expr = ValueExpression::Function { + name: "upper".into(), + args: vec![ValueExpression::Property(PropertyRef { + variable: "p".into(), + property: "name".into(), + })], + }; + + let df_expr = to_df_value_expr(&expr); + let s = format!("{:?}", df_expr); + assert!( + s.contains("upper") || s.contains("Upper"), + "Should use upper function, got: {}", + s + ); + } + + #[test] + fn test_tolower_with_contains_produces_valid_like() { + // This is the bug scenario: toLower(s.name) CONTAINS 'offer' + // Previously returned lit(0) which caused type coercion error + let tolower_expr = ValueExpression::Function { + name: "toLower".into(), + args: vec![ValueExpression::Property(PropertyRef { + variable: "s".into(), + property: "name".into(), + })], + }; + + let contains_expr = BooleanExpression::Contains { + expression: tolower_expr, + substring: "offer".into(), + }; + + let df_expr = to_df_boolean_expr(&contains_expr); + let s = format!("{:?}", df_expr); + + // Should be a Like expression with lower() on the column, not lit(0) + assert!( + s.contains("Like"), + "Should be a LIKE expression, got: {}", + s + ); + assert!( + s.contains("lower") || s.contains("Lower"), + "Should contain lower function, got: {}", + s + ); + assert!( + !s.contains("Int32(0)") && !s.contains("Int64(0)") && !s.contains("Utf8(\"\")"), + "Should NOT contain literal 0 or empty string (placeholder bugs), got: {}", + s + ); + } } diff --git a/rust/lance-graph/tests/test_datafusion_pipeline.rs b/rust/lance-graph/tests/test_datafusion_pipeline.rs index 8ee7451..c51b9ff 100644 --- a/rust/lance-graph/tests/test_datafusion_pipeline.rs +++ b/rust/lance-graph/tests/test_datafusion_pipeline.rs @@ -4115,3 +4115,137 @@ async fn test_datafusion_contains_in_relationship_query() { ] ); } + +// ============================================================================ +// String Function Tests (toLower, toUpper) +// ============================================================================ + +#[tokio::test] +async fn test_tolower_with_contains() { + // This test verifies the fix for: toLower(p.name) CONTAINS 'ali' + // Previously this caused: "type_coercion: There isn't a common type to coerce Int32 and Utf8" + let config = create_graph_config(); + let person_batch = create_person_dataset(); + + // Search for names containing 'ali' (case-insensitive via toLower) + let query = CypherQuery::new( + "MATCH (p:Person) WHERE toLower(p.name) CONTAINS 'ali' RETURN p.name, p.age", + ) + .unwrap() + .with_config(config); + + let mut datasets = HashMap::new(); + datasets.insert("Person".to_string(), person_batch); + + let result = query + .execute(datasets, Some(ExecutionStrategy::DataFusion)) + .await + .unwrap(); + + // Should find 'Alice' (toLower -> 'alice' contains 'ali') + assert_eq!(result.num_rows(), 1, "Expected 1 result for 'ali' search"); + + let names = result + .column(0) + .as_any() + .downcast_ref::() + .unwrap(); + assert_eq!(names.value(0), "Alice"); +} + +#[tokio::test] +async fn test_toupper_with_contains() { + let config = create_graph_config(); + let person_batch = create_person_dataset(); + + // Search for names containing 'BOB' (case-insensitive via toUpper) + let query = CypherQuery::new( + "MATCH (p:Person) WHERE toUpper(p.name) CONTAINS 'BOB' RETURN p.name", + ) + .unwrap() + .with_config(config); + + let mut datasets = HashMap::new(); + datasets.insert("Person".to_string(), person_batch); + + let result = query + .execute(datasets, Some(ExecutionStrategy::DataFusion)) + .await + .unwrap(); + + // Should find 'Bob' + assert_eq!(result.num_rows(), 1); + let names = result + .column(0) + .as_any() + .downcast_ref::() + .unwrap(); + assert_eq!(names.value(0), "Bob"); +} + +#[tokio::test] +async fn test_tolower_in_return_clause() { + let config = create_graph_config(); + let person_batch = create_person_dataset(); + + // Return lowercased names + let query = CypherQuery::new("MATCH (p:Person) WHERE p.name = 'Alice' RETURN toLower(p.name)") + .unwrap() + .with_config(config); + + let mut datasets = HashMap::new(); + datasets.insert("Person".to_string(), person_batch); + + let result = query + .execute(datasets, Some(ExecutionStrategy::DataFusion)) + .await + .unwrap(); + + assert_eq!(result.num_rows(), 1); + let names = result + .column(0) + .as_any() + .downcast_ref::() + .unwrap(); + assert_eq!(names.value(0), "alice"); +} + +#[tokio::test] +async fn test_tolower_with_integer_column_in_return() { + // This is the exact bug scenario: toLower(s.name) CONTAINS 'x' RETURN s.name, s.age + // The bug was that returning an integer column (age) alongside the toLower filter + // caused type coercion errors because unsupported functions returned lit(0) + let config = create_graph_config(); + let person_batch = create_person_dataset(); + + let query = CypherQuery::new( + "MATCH (p:Person) WHERE toLower(p.name) CONTAINS 'cha' RETURN p.name, p.age", + ) + .unwrap() + .with_config(config); + + let mut datasets = HashMap::new(); + datasets.insert("Person".to_string(), person_batch); + + let result = query + .execute(datasets, Some(ExecutionStrategy::DataFusion)) + .await + .unwrap(); + + // Should find 'Charlie' (toLower -> 'charlie' contains 'cha') + assert_eq!(result.num_rows(), 1); + + let names = result + .column(0) + .as_any() + .downcast_ref::() + .unwrap(); + let ages = result + .column(1) + .as_any() + .downcast_ref::() + .unwrap(); + + assert_eq!(names.value(0), "Charlie"); + assert_eq!(ages.value(0), 30); +} From 5c2d8483df76d1aa7b4fb44cd18dfd069485ab7f Mon Sep 17 00:00:00 2001 From: Youssef Date: Mon, 12 Jan 2026 23:22:25 +0100 Subject: [PATCH 2/4] chore: remove redundant test section headers --- rust/lance-graph/src/datafusion_planner/expression.rs | 4 ---- rust/lance-graph/tests/test_datafusion_pipeline.rs | 4 ---- 2 files changed, 8 deletions(-) diff --git a/rust/lance-graph/src/datafusion_planner/expression.rs b/rust/lance-graph/src/datafusion_planner/expression.rs index 756cb6b..bc41976 100644 --- a/rust/lance-graph/src/datafusion_planner/expression.rs +++ b/rust/lance-graph/src/datafusion_planner/expression.rs @@ -1278,10 +1278,6 @@ mod tests { assert_eq!(name, "expr", "Arithmetic should use generic name"); } - // ======================================================================== - // Unit tests for string functions (toLower, toUpper) - // ======================================================================== - #[test] fn test_value_expr_function_tolower() { let expr = ValueExpression::Function { diff --git a/rust/lance-graph/tests/test_datafusion_pipeline.rs b/rust/lance-graph/tests/test_datafusion_pipeline.rs index c51b9ff..cb3fc1e 100644 --- a/rust/lance-graph/tests/test_datafusion_pipeline.rs +++ b/rust/lance-graph/tests/test_datafusion_pipeline.rs @@ -4116,10 +4116,6 @@ async fn test_datafusion_contains_in_relationship_query() { ); } -// ============================================================================ -// String Function Tests (toLower, toUpper) -// ============================================================================ - #[tokio::test] async fn test_tolower_with_contains() { // This test verifies the fix for: toLower(p.name) CONTAINS 'ali' From ec80bb78f0155812206a56d52504dca85a78c7e4 Mon Sep 17 00:00:00 2001 From: Youssef Date: Mon, 12 Jan 2026 23:33:26 +0100 Subject: [PATCH 3/4] fix: move string function tests to correct section --- .../src/datafusion_planner/expression.rs | 240 +++++++++--------- 1 file changed, 120 insertions(+), 120 deletions(-) diff --git a/rust/lance-graph/src/datafusion_planner/expression.rs b/rust/lance-graph/src/datafusion_planner/expression.rs index bc41976..78c8d12 100644 --- a/rust/lance-graph/src/datafusion_planner/expression.rs +++ b/rust/lance-graph/src/datafusion_planner/expression.rs @@ -1054,6 +1054,126 @@ mod tests { assert!(s.contains("p__amount"), "Should contain column reference"); } + #[test] + fn test_value_expr_function_tolower() { + let expr = ValueExpression::Function { + name: "toLower".into(), + args: vec![ValueExpression::Property(PropertyRef { + variable: "p".into(), + property: "name".into(), + })], + }; + + let df_expr = to_df_value_expr(&expr); + let s = format!("{:?}", df_expr); + // Should be a ScalarFunction with lower + assert!( + s.contains("lower") || s.contains("Lower"), + "Should use lower function, got: {}", + s + ); + assert!(s.contains("p__name"), "Should contain column reference"); + } + + #[test] + fn test_value_expr_function_toupper() { + let expr = ValueExpression::Function { + name: "toUpper".into(), + args: vec![ValueExpression::Property(PropertyRef { + variable: "p".into(), + property: "name".into(), + })], + }; + + let df_expr = to_df_value_expr(&expr); + let s = format!("{:?}", df_expr); + // Should be a ScalarFunction with upper + assert!( + s.contains("upper") || s.contains("Upper"), + "Should use upper function, got: {}", + s + ); + assert!(s.contains("p__name"), "Should contain column reference"); + } + + #[test] + fn test_value_expr_function_lower_alias() { + // Test that 'lower' also works (SQL-style alias) + let expr = ValueExpression::Function { + name: "lower".into(), + args: vec![ValueExpression::Property(PropertyRef { + variable: "p".into(), + property: "name".into(), + })], + }; + + let df_expr = to_df_value_expr(&expr); + let s = format!("{:?}", df_expr); + assert!( + s.contains("lower") || s.contains("Lower"), + "Should use lower function, got: {}", + s + ); + } + + #[test] + fn test_value_expr_function_upper_alias() { + // Test that 'upper' also works (SQL-style alias) + let expr = ValueExpression::Function { + name: "upper".into(), + args: vec![ValueExpression::Property(PropertyRef { + variable: "p".into(), + property: "name".into(), + })], + }; + + let df_expr = to_df_value_expr(&expr); + let s = format!("{:?}", df_expr); + assert!( + s.contains("upper") || s.contains("Upper"), + "Should use upper function, got: {}", + s + ); + } + + #[test] + fn test_tolower_with_contains_produces_valid_like() { + // This is the bug scenario: toLower(s.name) CONTAINS 'offer' + // Previously returned lit(0) which caused type coercion error + let tolower_expr = ValueExpression::Function { + name: "toLower".into(), + args: vec![ValueExpression::Property(PropertyRef { + variable: "s".into(), + property: "name".into(), + })], + }; + + let contains_expr = BooleanExpression::Contains { + expression: tolower_expr, + substring: "offer".into(), + }; + + let df_expr = to_df_boolean_expr(&contains_expr); + let s = format!("{:?}", df_expr); + + // Should be a Like expression with lower() on the column, not lit(0) + assert!( + s.contains("Like"), + "Should be a LIKE expression, got: {}", + s + ); + assert!( + s.contains("lower") || s.contains("Lower"), + "Should contain lower function, got: {}", + s + ); + assert!( + !s.contains("Int32(0)") && !s.contains("Int64(0)") && !s.contains("Utf8(\"\")"), + "Should NOT contain literal 0 or empty string (placeholder bugs), got: {}", + s + ); + } + // ======================================================================== // Unit tests for contains_aggregate() // ======================================================================== @@ -1277,124 +1397,4 @@ mod tests { let name = to_cypher_column_name(&expr); assert_eq!(name, "expr", "Arithmetic should use generic name"); } - - #[test] - fn test_value_expr_function_tolower() { - let expr = ValueExpression::Function { - name: "toLower".into(), - args: vec![ValueExpression::Property(PropertyRef { - variable: "p".into(), - property: "name".into(), - })], - }; - - let df_expr = to_df_value_expr(&expr); - let s = format!("{:?}", df_expr); - // Should be a ScalarFunction with lower - assert!( - s.contains("lower") || s.contains("Lower"), - "Should use lower function, got: {}", - s - ); - assert!(s.contains("p__name"), "Should contain column reference"); - } - - #[test] - fn test_value_expr_function_toupper() { - let expr = ValueExpression::Function { - name: "toUpper".into(), - args: vec![ValueExpression::Property(PropertyRef { - variable: "p".into(), - property: "name".into(), - })], - }; - - let df_expr = to_df_value_expr(&expr); - let s = format!("{:?}", df_expr); - // Should be a ScalarFunction with upper - assert!( - s.contains("upper") || s.contains("Upper"), - "Should use upper function, got: {}", - s - ); - assert!(s.contains("p__name"), "Should contain column reference"); - } - - #[test] - fn test_value_expr_function_lower_alias() { - // Test that 'lower' also works (SQL-style alias) - let expr = ValueExpression::Function { - name: "lower".into(), - args: vec![ValueExpression::Property(PropertyRef { - variable: "p".into(), - property: "name".into(), - })], - }; - - let df_expr = to_df_value_expr(&expr); - let s = format!("{:?}", df_expr); - assert!( - s.contains("lower") || s.contains("Lower"), - "Should use lower function, got: {}", - s - ); - } - - #[test] - fn test_value_expr_function_upper_alias() { - // Test that 'upper' also works (SQL-style alias) - let expr = ValueExpression::Function { - name: "upper".into(), - args: vec![ValueExpression::Property(PropertyRef { - variable: "p".into(), - property: "name".into(), - })], - }; - - let df_expr = to_df_value_expr(&expr); - let s = format!("{:?}", df_expr); - assert!( - s.contains("upper") || s.contains("Upper"), - "Should use upper function, got: {}", - s - ); - } - - #[test] - fn test_tolower_with_contains_produces_valid_like() { - // This is the bug scenario: toLower(s.name) CONTAINS 'offer' - // Previously returned lit(0) which caused type coercion error - let tolower_expr = ValueExpression::Function { - name: "toLower".into(), - args: vec![ValueExpression::Property(PropertyRef { - variable: "s".into(), - property: "name".into(), - })], - }; - - let contains_expr = BooleanExpression::Contains { - expression: tolower_expr, - substring: "offer".into(), - }; - - let df_expr = to_df_boolean_expr(&contains_expr); - let s = format!("{:?}", df_expr); - - // Should be a Like expression with lower() on the column, not lit(0) - assert!( - s.contains("Like"), - "Should be a LIKE expression, got: {}", - s - ); - assert!( - s.contains("lower") || s.contains("Lower"), - "Should contain lower function, got: {}", - s - ); - assert!( - !s.contains("Int32(0)") && !s.contains("Int64(0)") && !s.contains("Utf8(\"\")"), - "Should NOT contain literal 0 or empty string (placeholder bugs), got: {}", - s - ); - } } From 2ff66f03e59c60a949af38daa7ad4071a4a895ed Mon Sep 17 00:00:00 2001 From: Youssef Date: Tue, 13 Jan 2026 15:48:29 +0100 Subject: [PATCH 4/4] style: apply cargo fmt formatting Co-Authored-By: Claude --- rust/lance-graph/src/datafusion_planner/expression.rs | 2 +- rust/lance-graph/tests/test_datafusion_pipeline.rs | 9 ++++----- 2 files changed, 5 insertions(+), 6 deletions(-) diff --git a/rust/lance-graph/src/datafusion_planner/expression.rs b/rust/lance-graph/src/datafusion_planner/expression.rs index 78c8d12..abd8ab3 100644 --- a/rust/lance-graph/src/datafusion_planner/expression.rs +++ b/rust/lance-graph/src/datafusion_planner/expression.rs @@ -7,9 +7,9 @@ use crate::ast::{BooleanExpression, PropertyValue, ValueExpression}; use crate::datafusion_planner::udf; -use datafusion::logical_expr::{col, lit, BinaryExpr, Expr, Operator}; use datafusion::functions::string::lower; use datafusion::functions::string::upper; +use datafusion::logical_expr::{col, lit, BinaryExpr, Expr, Operator}; use datafusion_functions_aggregate::average::avg; use datafusion_functions_aggregate::count::count; use datafusion_functions_aggregate::min_max::max; diff --git a/rust/lance-graph/tests/test_datafusion_pipeline.rs b/rust/lance-graph/tests/test_datafusion_pipeline.rs index cb3fc1e..9c1998a 100644 --- a/rust/lance-graph/tests/test_datafusion_pipeline.rs +++ b/rust/lance-graph/tests/test_datafusion_pipeline.rs @@ -4155,11 +4155,10 @@ async fn test_toupper_with_contains() { let person_batch = create_person_dataset(); // Search for names containing 'BOB' (case-insensitive via toUpper) - let query = CypherQuery::new( - "MATCH (p:Person) WHERE toUpper(p.name) CONTAINS 'BOB' RETURN p.name", - ) - .unwrap() - .with_config(config); + let query = + CypherQuery::new("MATCH (p:Person) WHERE toUpper(p.name) CONTAINS 'BOB' RETURN p.name") + .unwrap() + .with_config(config); let mut datasets = HashMap::new(); datasets.insert("Person".to_string(), person_batch);