diff --git a/rust/lance-graph/src/datafusion_planner/expression.rs b/rust/lance-graph/src/datafusion_planner/expression.rs index 3c8d981..abd8ab3 100644 --- a/rust/lance-graph/src/datafusion_planner/expression.rs +++ b/rust/lance-graph/src/datafusion_planner/expression.rs @@ -7,6 +7,8 @@ use crate::ast::{BooleanExpression, PropertyValue, ValueExpression}; use crate::datafusion_planner::udf; +use datafusion::functions::string::lower; +use datafusion::functions::string::upper; use datafusion::logical_expr::{col, lit, BinaryExpr, Expr, Operator}; use datafusion_functions_aggregate::average::avg; use datafusion_functions_aggregate::count::count; @@ -186,9 +188,29 @@ pub(crate) fn to_df_value_expr(expr: &ValueExpression) -> Expr { lit(0) } } + // String functions + "tolower" | "lower" => { + if args.len() == 1 { + let arg_expr = to_df_value_expr(&args[0]); + lower().call(vec![arg_expr]) + } else { + // Invalid argument count - return NULL + Expr::Literal(datafusion::scalar::ScalarValue::Null, None) + } + } + "toupper" | "upper" => { + if args.len() == 1 { + let arg_expr = to_df_value_expr(&args[0]); + upper().call(vec![arg_expr]) + } else { + // Invalid argument count - return NULL + Expr::Literal(datafusion::scalar::ScalarValue::Null, None) + } + } _ => { - // Unsupported function - return placeholder for now - lit(0) + // Unsupported function - return NULL which coerces to any type + // This prevents type coercion errors in both string and numeric contexts + Expr::Literal(datafusion::scalar::ScalarValue::Null, None) } } } @@ -1032,6 +1054,126 @@ mod tests { assert!(s.contains("p__amount"), "Should contain column reference"); } + #[test] + fn test_value_expr_function_tolower() { + let expr = ValueExpression::Function { + name: "toLower".into(), + args: vec![ValueExpression::Property(PropertyRef { + variable: "p".into(), + property: "name".into(), + })], + }; + + let df_expr = to_df_value_expr(&expr); + let s = format!("{:?}", df_expr); + // Should be a ScalarFunction with lower + assert!( + s.contains("lower") || s.contains("Lower"), + "Should use lower function, got: {}", + s + ); + assert!(s.contains("p__name"), "Should contain column reference"); + } + + #[test] + fn test_value_expr_function_toupper() { + let expr = ValueExpression::Function { + name: "toUpper".into(), + args: vec![ValueExpression::Property(PropertyRef { + variable: "p".into(), + property: "name".into(), + })], + }; + + let df_expr = to_df_value_expr(&expr); + let s = format!("{:?}", df_expr); + // Should be a ScalarFunction with upper + assert!( + s.contains("upper") || s.contains("Upper"), + "Should use upper function, got: {}", + s + ); + assert!(s.contains("p__name"), "Should contain column reference"); + } + + #[test] + fn test_value_expr_function_lower_alias() { + // Test that 'lower' also works (SQL-style alias) + let expr = ValueExpression::Function { + name: "lower".into(), + args: vec![ValueExpression::Property(PropertyRef { + variable: "p".into(), + property: "name".into(), + })], + }; + + let df_expr = to_df_value_expr(&expr); + let s = format!("{:?}", df_expr); + assert!( + s.contains("lower") || s.contains("Lower"), + "Should use lower function, got: {}", + s + ); + } + + #[test] + fn test_value_expr_function_upper_alias() { + // Test that 'upper' also works (SQL-style alias) + let expr = ValueExpression::Function { + name: "upper".into(), + args: vec![ValueExpression::Property(PropertyRef { + variable: "p".into(), + property: "name".into(), + })], + }; + + let df_expr = to_df_value_expr(&expr); + let s = format!("{:?}", df_expr); + assert!( + s.contains("upper") || s.contains("Upper"), + "Should use upper function, got: {}", + s + ); + } + + #[test] + fn test_tolower_with_contains_produces_valid_like() { + // This is the bug scenario: toLower(s.name) CONTAINS 'offer' + // Previously returned lit(0) which caused type coercion error + let tolower_expr = ValueExpression::Function { + name: "toLower".into(), + args: vec![ValueExpression::Property(PropertyRef { + variable: "s".into(), + property: "name".into(), + })], + }; + + let contains_expr = BooleanExpression::Contains { + expression: tolower_expr, + substring: "offer".into(), + }; + + let df_expr = to_df_boolean_expr(&contains_expr); + let s = format!("{:?}", df_expr); + + // Should be a Like expression with lower() on the column, not lit(0) + assert!( + s.contains("Like"), + "Should be a LIKE expression, got: {}", + s + ); + assert!( + s.contains("lower") || s.contains("Lower"), + "Should contain lower function, got: {}", + s + ); + assert!( + !s.contains("Int32(0)") && !s.contains("Int64(0)") && !s.contains("Utf8(\"\")"), + "Should NOT contain literal 0 or empty string (placeholder bugs), got: {}", + s + ); + } + // ======================================================================== // Unit tests for contains_aggregate() // ======================================================================== diff --git a/rust/lance-graph/tests/test_datafusion_pipeline.rs b/rust/lance-graph/tests/test_datafusion_pipeline.rs index 8ee7451..9c1998a 100644 --- a/rust/lance-graph/tests/test_datafusion_pipeline.rs +++ b/rust/lance-graph/tests/test_datafusion_pipeline.rs @@ -4115,3 +4115,132 @@ async fn test_datafusion_contains_in_relationship_query() { ] ); } + +#[tokio::test] +async fn test_tolower_with_contains() { + // This test verifies the fix for: toLower(p.name) CONTAINS 'ali' + // Previously this caused: "type_coercion: There isn't a common type to coerce Int32 and Utf8" + let config = create_graph_config(); + let person_batch = create_person_dataset(); + + // Search for names containing 'ali' (case-insensitive via toLower) + let query = CypherQuery::new( + "MATCH (p:Person) WHERE toLower(p.name) CONTAINS 'ali' RETURN p.name, p.age", + ) + .unwrap() + .with_config(config); + + let mut datasets = HashMap::new(); + datasets.insert("Person".to_string(), person_batch); + + let result = query + .execute(datasets, Some(ExecutionStrategy::DataFusion)) + .await + .unwrap(); + + // Should find 'Alice' (toLower -> 'alice' contains 'ali') + assert_eq!(result.num_rows(), 1, "Expected 1 result for 'ali' search"); + + let names = result + .column(0) + .as_any() + .downcast_ref::() + .unwrap(); + assert_eq!(names.value(0), "Alice"); +} + +#[tokio::test] +async fn test_toupper_with_contains() { + let config = create_graph_config(); + let person_batch = create_person_dataset(); + + // Search for names containing 'BOB' (case-insensitive via toUpper) + let query = + CypherQuery::new("MATCH (p:Person) WHERE toUpper(p.name) CONTAINS 'BOB' RETURN p.name") + .unwrap() + .with_config(config); + + let mut datasets = HashMap::new(); + datasets.insert("Person".to_string(), person_batch); + + let result = query + .execute(datasets, Some(ExecutionStrategy::DataFusion)) + .await + .unwrap(); + + // Should find 'Bob' + assert_eq!(result.num_rows(), 1); + let names = result + .column(0) + .as_any() + .downcast_ref::() + .unwrap(); + assert_eq!(names.value(0), "Bob"); +} + +#[tokio::test] +async fn test_tolower_in_return_clause() { + let config = create_graph_config(); + let person_batch = create_person_dataset(); + + // Return lowercased names + let query = CypherQuery::new("MATCH (p:Person) WHERE p.name = 'Alice' RETURN toLower(p.name)") + .unwrap() + .with_config(config); + + let mut datasets = HashMap::new(); + datasets.insert("Person".to_string(), person_batch); + + let result = query + .execute(datasets, Some(ExecutionStrategy::DataFusion)) + .await + .unwrap(); + + assert_eq!(result.num_rows(), 1); + let names = result + .column(0) + .as_any() + .downcast_ref::() + .unwrap(); + assert_eq!(names.value(0), "alice"); +} + +#[tokio::test] +async fn test_tolower_with_integer_column_in_return() { + // This is the exact bug scenario: toLower(s.name) CONTAINS 'x' RETURN s.name, s.age + // The bug was that returning an integer column (age) alongside the toLower filter + // caused type coercion errors because unsupported functions returned lit(0) + let config = create_graph_config(); + let person_batch = create_person_dataset(); + + let query = CypherQuery::new( + "MATCH (p:Person) WHERE toLower(p.name) CONTAINS 'cha' RETURN p.name, p.age", + ) + .unwrap() + .with_config(config); + + let mut datasets = HashMap::new(); + datasets.insert("Person".to_string(), person_batch); + + let result = query + .execute(datasets, Some(ExecutionStrategy::DataFusion)) + .await + .unwrap(); + + // Should find 'Charlie' (toLower -> 'charlie' contains 'cha') + assert_eq!(result.num_rows(), 1); + + let names = result + .column(0) + .as_any() + .downcast_ref::() + .unwrap(); + let ages = result + .column(1) + .as_any() + .downcast_ref::() + .unwrap(); + + assert_eq!(names.value(0), "Charlie"); + assert_eq!(ages.value(0), 30); +}