Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
146 changes: 144 additions & 2 deletions rust/lance-graph/src/datafusion_planner/expression.rs
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,8 @@

use crate::ast::{BooleanExpression, PropertyValue, ValueExpression};
use crate::datafusion_planner::udf;
use datafusion::functions::string::lower;
use datafusion::functions::string::upper;
use datafusion::logical_expr::{col, lit, BinaryExpr, Expr, Operator};
use datafusion_functions_aggregate::average::avg;
use datafusion_functions_aggregate::count::count;
Expand Down Expand Up @@ -186,9 +188,29 @@ pub(crate) fn to_df_value_expr(expr: &ValueExpression) -> Expr {
lit(0)
}
}
// String functions
"tolower" | "lower" => {
if args.len() == 1 {
let arg_expr = to_df_value_expr(&args[0]);
lower().call(vec![arg_expr])
} else {
// Invalid argument count - return NULL
Expr::Literal(datafusion::scalar::ScalarValue::Null, None)
}
}
"toupper" | "upper" => {
if args.len() == 1 {
let arg_expr = to_df_value_expr(&args[0]);
upper().call(vec![arg_expr])
} else {
// Invalid argument count - return NULL
Expr::Literal(datafusion::scalar::ScalarValue::Null, None)
}
}
_ => {
// Unsupported function - return placeholder for now
lit(0)
// Unsupported function - return NULL which coerces to any type
// This prevents type coercion errors in both string and numeric contexts
Expr::Literal(datafusion::scalar::ScalarValue::Null, None)
}
}
}
Expand Down Expand Up @@ -1032,6 +1054,126 @@ mod tests {
assert!(s.contains("p__amount"), "Should contain column reference");
}

#[test]
fn test_value_expr_function_tolower() {
let expr = ValueExpression::Function {
name: "toLower".into(),
args: vec![ValueExpression::Property(PropertyRef {
variable: "p".into(),
property: "name".into(),
})],
};

let df_expr = to_df_value_expr(&expr);
let s = format!("{:?}", df_expr);
// Should be a ScalarFunction with lower
assert!(
s.contains("lower") || s.contains("Lower"),
"Should use lower function, got: {}",
s
);
assert!(s.contains("p__name"), "Should contain column reference");
}

#[test]
fn test_value_expr_function_toupper() {
let expr = ValueExpression::Function {
name: "toUpper".into(),
args: vec![ValueExpression::Property(PropertyRef {
variable: "p".into(),
property: "name".into(),
})],
};

let df_expr = to_df_value_expr(&expr);
let s = format!("{:?}", df_expr);
// Should be a ScalarFunction with upper
assert!(
s.contains("upper") || s.contains("Upper"),
"Should use upper function, got: {}",
s
);
assert!(s.contains("p__name"), "Should contain column reference");
}

#[test]
fn test_value_expr_function_lower_alias() {
// Test that 'lower' also works (SQL-style alias)
let expr = ValueExpression::Function {
name: "lower".into(),
args: vec![ValueExpression::Property(PropertyRef {
variable: "p".into(),
property: "name".into(),
})],
};

let df_expr = to_df_value_expr(&expr);
let s = format!("{:?}", df_expr);
assert!(
s.contains("lower") || s.contains("Lower"),
"Should use lower function, got: {}",
s
);
}

#[test]
fn test_value_expr_function_upper_alias() {
// Test that 'upper' also works (SQL-style alias)
let expr = ValueExpression::Function {
name: "upper".into(),
args: vec![ValueExpression::Property(PropertyRef {
variable: "p".into(),
property: "name".into(),
})],
};

let df_expr = to_df_value_expr(&expr);
let s = format!("{:?}", df_expr);
assert!(
s.contains("upper") || s.contains("Upper"),
"Should use upper function, got: {}",
s
);
}

#[test]
fn test_tolower_with_contains_produces_valid_like() {
// This is the bug scenario: toLower(s.name) CONTAINS 'offer'
// Previously returned lit(0) which caused type coercion error
let tolower_expr = ValueExpression::Function {
name: "toLower".into(),
args: vec![ValueExpression::Property(PropertyRef {
variable: "s".into(),
property: "name".into(),
})],
};

let contains_expr = BooleanExpression::Contains {
expression: tolower_expr,
substring: "offer".into(),
};

let df_expr = to_df_boolean_expr(&contains_expr);
let s = format!("{:?}", df_expr);

// Should be a Like expression with lower() on the column, not lit(0)
assert!(
s.contains("Like"),
"Should be a LIKE expression, got: {}",
s
);
assert!(
s.contains("lower") || s.contains("Lower"),
"Should contain lower function, got: {}",
s
);
assert!(
!s.contains("Int32(0)") && !s.contains("Int64(0)") && !s.contains("Utf8(\"\")"),
"Should NOT contain literal 0 or empty string (placeholder bugs), got: {}",
s
);
}

// ========================================================================
// Unit tests for contains_aggregate()
// ========================================================================
Expand Down
129 changes: 129 additions & 0 deletions rust/lance-graph/tests/test_datafusion_pipeline.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4115,3 +4115,132 @@ async fn test_datafusion_contains_in_relationship_query() {
]
);
}

#[tokio::test]
async fn test_tolower_with_contains() {
// This test verifies the fix for: toLower(p.name) CONTAINS 'ali'
// Previously this caused: "type_coercion: There isn't a common type to coerce Int32 and Utf8"
let config = create_graph_config();
let person_batch = create_person_dataset();

// Search for names containing 'ali' (case-insensitive via toLower)
let query = CypherQuery::new(
"MATCH (p:Person) WHERE toLower(p.name) CONTAINS 'ali' RETURN p.name, p.age",
)
.unwrap()
.with_config(config);

let mut datasets = HashMap::new();
datasets.insert("Person".to_string(), person_batch);

let result = query
.execute(datasets, Some(ExecutionStrategy::DataFusion))
.await
.unwrap();

// Should find 'Alice' (toLower -> 'alice' contains 'ali')
assert_eq!(result.num_rows(), 1, "Expected 1 result for 'ali' search");

let names = result
.column(0)
.as_any()
.downcast_ref::<StringArray>()
.unwrap();
assert_eq!(names.value(0), "Alice");
}

#[tokio::test]
async fn test_toupper_with_contains() {
let config = create_graph_config();
let person_batch = create_person_dataset();

// Search for names containing 'BOB' (case-insensitive via toUpper)
let query =
CypherQuery::new("MATCH (p:Person) WHERE toUpper(p.name) CONTAINS 'BOB' RETURN p.name")
.unwrap()
.with_config(config);

let mut datasets = HashMap::new();
datasets.insert("Person".to_string(), person_batch);

let result = query
.execute(datasets, Some(ExecutionStrategy::DataFusion))
.await
.unwrap();

// Should find 'Bob'
assert_eq!(result.num_rows(), 1);
let names = result
.column(0)
.as_any()
.downcast_ref::<StringArray>()
.unwrap();
assert_eq!(names.value(0), "Bob");
}

#[tokio::test]
async fn test_tolower_in_return_clause() {
let config = create_graph_config();
let person_batch = create_person_dataset();

// Return lowercased names
let query = CypherQuery::new("MATCH (p:Person) WHERE p.name = 'Alice' RETURN toLower(p.name)")
.unwrap()
.with_config(config);

let mut datasets = HashMap::new();
datasets.insert("Person".to_string(), person_batch);

let result = query
.execute(datasets, Some(ExecutionStrategy::DataFusion))
.await
.unwrap();

assert_eq!(result.num_rows(), 1);
let names = result
.column(0)
.as_any()
.downcast_ref::<StringArray>()
.unwrap();
assert_eq!(names.value(0), "alice");
}

#[tokio::test]
async fn test_tolower_with_integer_column_in_return() {
// This is the exact bug scenario: toLower(s.name) CONTAINS 'x' RETURN s.name, s.age
// The bug was that returning an integer column (age) alongside the toLower filter
// caused type coercion errors because unsupported functions returned lit(0)
let config = create_graph_config();
let person_batch = create_person_dataset();

let query = CypherQuery::new(
"MATCH (p:Person) WHERE toLower(p.name) CONTAINS 'cha' RETURN p.name, p.age",
)
.unwrap()
.with_config(config);

let mut datasets = HashMap::new();
datasets.insert("Person".to_string(), person_batch);

let result = query
.execute(datasets, Some(ExecutionStrategy::DataFusion))
.await
.unwrap();

// Should find 'Charlie' (toLower -> 'charlie' contains 'cha')
assert_eq!(result.num_rows(), 1);

let names = result
.column(0)
.as_any()
.downcast_ref::<StringArray>()
.unwrap();
let ages = result
.column(1)
.as_any()
.downcast_ref::<Int64Array>()
.unwrap();

assert_eq!(names.value(0), "Charlie");
assert_eq!(ages.value(0), 30);
}
Loading