Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions .github/workflows/benchmarks.yml
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@ jobs:
- { package: uu_du }
- { package: uu_expand }
- { package: uu_fold }
- { package: uu_join }
- { package: uu_ls }
- { package: uu_mv }
- { package: uu_nl }
Expand Down
2 changes: 2 additions & 0 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

9 changes: 9 additions & 0 deletions src/uu/join/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -27,3 +27,12 @@ fluent = { workspace = true }
[[bin]]
name = "join"
path = "src/main.rs"

[dev-dependencies]
divan = { workspace = true }
tempfile = { workspace = true }
uucore = { workspace = true, features = ["benchmark"] }

[[bench]]
name = "join_bench"
harness = false
115 changes: 115 additions & 0 deletions src/uu/join/benches/join_bench.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,115 @@
// This file is part of the uutils coreutils package.
//
// For the full copyright and license information, please view the LICENSE
// file that was distributed with this source code.

use divan::{Bencher, black_box};
use std::{fs::File, io::Write};
use tempfile::TempDir;
use uu_join::uumain;
use uucore::benchmark::run_util_function;

/// Create two sorted files with matching keys for join benchmarking
fn create_join_files(temp_dir: &TempDir, num_lines: usize) -> (String, String) {
let file1_path = temp_dir.path().join("file1.txt");
let file2_path = temp_dir.path().join("file2.txt");

let mut file1 = File::create(&file1_path).unwrap();
let mut file2 = File::create(&file2_path).unwrap();

for i in 0..num_lines {
writeln!(file1, "{i:08} field1_{i} field2_{i}").unwrap();
writeln!(file2, "{i:08} data1_{i} data2_{i}").unwrap();
}

(
file1_path.to_str().unwrap().to_string(),
file2_path.to_str().unwrap().to_string(),
)
}

/// Create two files with partial overlap for join benchmarking
fn create_partial_overlap_files(
temp_dir: &TempDir,
num_lines: usize,
overlap_ratio: f64,
) -> (String, String) {
let file1_path = temp_dir.path().join("file1.txt");
let file2_path = temp_dir.path().join("file2.txt");

let mut file1 = File::create(&file1_path).unwrap();
let mut file2 = File::create(&file2_path).unwrap();

let overlap_count = (num_lines as f64 * overlap_ratio) as usize;

// File 1: keys 0 to num_lines-1
for i in 0..num_lines {
writeln!(file1, "{i:08} f1_data_{i}").unwrap();
}

// File 2: keys (num_lines - overlap_count) to (2*num_lines - overlap_count - 1)
let start = num_lines - overlap_count;
for i in 0..num_lines {
writeln!(file2, "{:08} f2_data_{}", start + i, i).unwrap();
}

(
file1_path.to_str().unwrap().to_string(),
file2_path.to_str().unwrap().to_string(),
)
}

/// Benchmark basic join with fully matching keys
#[divan::bench]
fn join_full_match(bencher: Bencher) {
let num_lines = 10000;
let temp_dir = TempDir::new().unwrap();
let (file1, file2) = create_join_files(&temp_dir, num_lines);

bencher.bench(|| {
black_box(run_util_function(uumain, &[&file1, &file2]));
});
}

/// Benchmark join with partial overlap (50%)
#[divan::bench]
fn join_partial_overlap(bencher: Bencher) {
let num_lines = 10000;
let temp_dir = TempDir::new().unwrap();
let (file1, file2) = create_partial_overlap_files(&temp_dir, num_lines, 0.5);

bencher.bench(|| {
black_box(run_util_function(uumain, &[&file1, &file2]));
});
}

/// Benchmark join with custom field separator
#[divan::bench]
fn join_custom_separator(bencher: Bencher) {
let num_lines = 10000;
let temp_dir = TempDir::new().unwrap();
let file1_path = temp_dir.path().join("file1.txt");
let file2_path = temp_dir.path().join("file2.txt");

let mut file1 = File::create(&file1_path).unwrap();
let mut file2 = File::create(&file2_path).unwrap();

for i in 0..num_lines {
writeln!(file1, "{i:08}\tfield1_{i}\tfield2_{i}").unwrap();
writeln!(file2, "{i:08}\tdata1_{i}\tdata2_{i}").unwrap();
}

let file1_str = file1_path.to_str().unwrap();
let file2_str = file2_path.to_str().unwrap();

bencher.bench(|| {
black_box(run_util_function(
uumain,
&["-t", "\t", file1_str, file2_str],
));
});
}

fn main() {
divan::main();
}
Loading