diff --git a/csharp/Platform.Collections.Benchmarks/IntersectionPerformanceComparison.cs b/csharp/Platform.Collections.Benchmarks/IntersectionPerformanceComparison.cs new file mode 100644 index 00000000..84347540 --- /dev/null +++ b/csharp/Platform.Collections.Benchmarks/IntersectionPerformanceComparison.cs @@ -0,0 +1,151 @@ +using BenchmarkDotNet.Attributes; +using System; +using System.Collections.Generic; +using System.Linq; +using Platform.Random; + +namespace Platform.Collections.Benchmarks +{ + /// + /// Performance comparison between HashSet and BitString intersection operations. + /// Tests various scenarios: sparse vs dense sets, different sizes and intersection rates. + /// + [SimpleJob] + [MemoryDiagnoser] + public class IntersectionPerformanceComparison + { + [Params(1000, 10000, 100000, 1000000)] + public int N { get; set; } + + [Params(0.1, 0.5, 0.9)] + public double FillRate { get; set; } + + [Params(0.1, 0.3, 0.7)] + public double IntersectionRate { get; set; } + + private BitString _bitStringLeft; + private BitString _bitStringRight; + private HashSet _hashSetLeft; + private HashSet _hashSetRight; + + [GlobalSetup] + public void Setup() + { + var random = RandomHelpers.Default; + + // Setup BitString + _bitStringLeft = new BitString(N); + _bitStringRight = new BitString(N); + + // Setup HashSet + _hashSetLeft = new HashSet(); + _hashSetRight = new HashSet(); + + // Fill left collections + var leftBits = new List(); + for (int i = 0; i < N; i++) + { + if (random.NextDouble() < FillRate) + { + _bitStringLeft.Set(i); + _hashSetLeft.Add(i); + leftBits.Add(i); + } + } + + // Fill right collections with controlled intersection + var intersectionSize = (int)(leftBits.Count * IntersectionRate); + var rightOnlySize = (int)(leftBits.Count * FillRate) - intersectionSize; + + // Add intersection elements + var intersectionIndices = leftBits.Take(intersectionSize).ToList(); + foreach (var index in intersectionIndices) + { + _bitStringRight.Set(index); + _hashSetRight.Add(index); + } + + // Add right-only elements + int rightOnlyAdded = 0; + for (int i = 0; i < N && rightOnlyAdded < rightOnlySize; i++) + { + if (!leftBits.Contains(i) && random.NextDouble() < FillRate) + { + _bitStringRight.Set(i); + _hashSetRight.Add(i); + rightOnlyAdded++; + } + } + } + + [Benchmark(Baseline = true)] + public HashSet HashSetIntersection() + { + var result = new HashSet(_hashSetLeft); + result.IntersectWith(_hashSetRight); + return result; + } + + [Benchmark] + public List BitStringIntersection() + { + var leftCopy = new BitString(_bitStringLeft); + leftCopy.And(_bitStringRight); + return leftCopy.GetSetIndices(); + } + + [Benchmark] + public List BitStringVectorIntersection() + { + var leftCopy = new BitString(_bitStringLeft); + leftCopy.VectorAnd(_bitStringRight); + return leftCopy.GetSetIndices(); + } + + [Benchmark] + public List BitStringParallelIntersection() + { + var leftCopy = new BitString(_bitStringLeft); + leftCopy.ParallelAnd(_bitStringRight); + return leftCopy.GetSetIndices(); + } + + [Benchmark] + public List BitStringParallelVectorIntersection() + { + var leftCopy = new BitString(_bitStringLeft); + leftCopy.ParallelVectorAnd(_bitStringRight); + return leftCopy.GetSetIndices(); + } + + [Benchmark] + public List BitStringGetCommonIndices() + { + return _bitStringLeft.GetCommonIndices(_bitStringRight); + } + + [Benchmark] + public long BitStringCountCommonBits() + { + return _bitStringLeft.CountCommonBits(_bitStringRight); + } + + [Benchmark] + public bool BitStringHaveCommonBits() + { + return _bitStringLeft.HaveCommonBits(_bitStringRight); + } + + [Benchmark] + public int HashSetIntersectionCount() + { + return _hashSetLeft.Intersect(_hashSetRight).Count(); + } + + [Benchmark] + public bool HashSetHaveCommon() + { + return _hashSetLeft.Overlaps(_hashSetRight); + } + } +} \ No newline at end of file diff --git a/csharp/Platform.Collections.Benchmarks/Program.cs b/csharp/Platform.Collections.Benchmarks/Program.cs index cbde7630..da59196f 100644 --- a/csharp/Platform.Collections.Benchmarks/Program.cs +++ b/csharp/Platform.Collections.Benchmarks/Program.cs @@ -1,9 +1,27 @@ using BenchmarkDotNet.Running; +using System; namespace Platform.Collections.Benchmarks { static class Program { - static void Main() => BenchmarkRunner.Run(); + static void Main(string[] args) + { + if (args.Length > 0 && args[0] == "intersection") + { + BenchmarkRunner.Run(); + } + else if (args.Length > 0 && args[0] == "bitstring") + { + BenchmarkRunner.Run(); + } + else + { + Console.WriteLine("Usage: dotnet run [intersection|bitstring]"); + Console.WriteLine(" intersection - Run HashSet vs BitString intersection performance comparison"); + Console.WriteLine(" bitstring - Run BitString operation benchmarks"); + Console.WriteLine(" (no args) - Show this help"); + } + } } } diff --git a/examples/intersection-performance-comparison.md b/examples/intersection-performance-comparison.md new file mode 100644 index 00000000..78381650 --- /dev/null +++ b/examples/intersection-performance-comparison.md @@ -0,0 +1,135 @@ +# HashSet vs BitString Intersection Performance Comparison + +This document describes the performance comparison benchmarks implemented to address [Issue #52](https://github.com/linksplatform/Collections/issues/52) - "Compare HashSet and BitString intersection performance". + +## Implementation Overview + +A comprehensive benchmark suite has been added to compare the intersection performance between `HashSet` and `BitString` collections under various scenarios. + +## Benchmark Class: `IntersectionPerformanceComparison` + +Located in: `csharp/Platform.Collections.Benchmarks/IntersectionPerformanceComparison.cs` + +### Test Parameters + +The benchmark tests different combinations of: + +- **Collection Size (N)**: 1,000, 10,000, 100,000, 1,000,000 elements +- **Fill Rate**: 0.1 (sparse), 0.5 (medium), 0.9 (dense) +- **Intersection Rate**: 0.1 (low overlap), 0.3 (medium overlap), 0.7 (high overlap) + +### Benchmark Methods + +#### HashSet Operations +- `HashSetIntersection()` - Standard HashSet.IntersectWith() method (baseline) +- `HashSetIntersectionCount()` - Count intersection elements using LINQ +- `HashSetHaveCommon()` - Check if sets overlap using Overlaps() method + +#### BitString Operations +- `BitStringIntersection()` - BitString.And() with GetSetIndices() +- `BitStringVectorIntersection()` - BitString.VectorAnd() with GetSetIndices() +- `BitStringParallelIntersection()` - BitString.ParallelAnd() with GetSetIndices() +- `BitStringParallelVectorIntersection()` - BitString.ParallelVectorAnd() with GetSetIndices() +- `BitStringGetCommonIndices()` - Direct GetCommonIndices() method +- `BitStringCountCommonBits()` - Count intersection elements +- `BitStringHaveCommonBits()` - Check if bitstrings have common bits + +## Running the Benchmarks + +### Prerequisites + +- .NET 8.0 SDK +- BenchmarkDotNet package (already included) + +### Commands + +```bash +# Navigate to the benchmark project +cd csharp/Platform.Collections.Benchmarks + +# Run intersection performance comparison +dotnet run intersection + +# Run original BitString benchmarks +dotnet run bitstring + +# Show help +dotnet run +``` + +### Sample Benchmark Execution + +```bash +dotnet run -- --configuration Release intersection +``` + +This will run the full benchmark suite with all parameter combinations, testing: +- 4 collection sizes × 3 fill rates × 3 intersection rates = 36 parameter combinations +- 11 different intersection methods per combination +- Total: 396 individual benchmark runs + +## Expected Results Analysis + +### When BitString Should Perform Better +- **Dense collections** (high fill rate): BitString uses compact bit operations +- **Large collections**: Vectorized and parallel operations provide advantages +- **Simple existence checks**: `HaveCommonBits()` vs `Overlaps()` should be faster +- **Counting operations**: Bit counting can be more efficient than enumeration + +### When HashSet Should Perform Better +- **Sparse collections** (low fill rate): Less memory overhead and faster iteration +- **Small collections**: Lower setup overhead +- **Complex intersection results**: Direct set operations may be more efficient + +### Performance Factors Tested + +1. **Memory usage**: BitString has fixed memory based on max index, HashSet varies by element count +2. **CPU utilization**: BitString can leverage SIMD and parallelization +3. **Cache efficiency**: Compact bit representation vs pointer-based hash table +4. **Algorithmic complexity**: O(n) bit operations vs O(n+m) set operations + +## Usage Examples + +### Basic BitString Intersection +```csharp +var left = new BitString(1000); +var right = new BitString(1000); + +// Setup bits... +left.Set(5); left.Set(10); left.Set(15); +right.Set(10); right.Set(15); right.Set(20); + +// Intersection using And operation +var result = new BitString(left); +result.And(right); +var commonIndices = result.GetSetIndices(); // [10, 15] + +// Or using direct method +var commonIndices2 = left.GetCommonIndices(right); // [10, 15] +``` + +### HashSet Intersection +```csharp +var left = new HashSet { 5, 10, 15 }; +var right = new HashSet { 10, 15, 20 }; + +// Standard intersection +var result = new HashSet(left); +result.IntersectWith(right); // {10, 15} + +// Count intersection +int count = left.Intersect(right).Count(); // 2 + +// Check overlap +bool hasCommon = left.Overlaps(right); // true +``` + +## Key Insights + +This benchmark suite enables empirical comparison of: +- Raw intersection performance across different data characteristics +- Memory efficiency for different sparsity patterns +- Scalability of parallel vs sequential approaches +- Effectiveness of hardware acceleration (SIMD) for bit operations + +The results will help determine optimal collection choice based on specific use case requirements. \ No newline at end of file