Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
@@ -0,0 +1,151 @@
using BenchmarkDotNet.Attributes;
using System;
using System.Collections.Generic;
using System.Linq;
using Platform.Random;

namespace Platform.Collections.Benchmarks
{
/// <summary>
/// Performance comparison between HashSet and BitString intersection operations.
/// Tests various scenarios: sparse vs dense sets, different sizes and intersection rates.
/// </summary>
[SimpleJob]
[MemoryDiagnoser]
public class IntersectionPerformanceComparison
{
[Params(1000, 10000, 100000, 1000000)]
public int N { get; set; }

[Params(0.1, 0.5, 0.9)]
public double FillRate { get; set; }

[Params(0.1, 0.3, 0.7)]
public double IntersectionRate { get; set; }

private BitString _bitStringLeft;
private BitString _bitStringRight;
private HashSet<int> _hashSetLeft;
private HashSet<int> _hashSetRight;

[GlobalSetup]
public void Setup()
{
var random = RandomHelpers.Default;

// Setup BitString
_bitStringLeft = new BitString(N);
_bitStringRight = new BitString(N);

// Setup HashSet
_hashSetLeft = new HashSet<int>();
_hashSetRight = new HashSet<int>();

// Fill left collections
var leftBits = new List<int>();
for (int i = 0; i < N; i++)
{
if (random.NextDouble() < FillRate)
{
_bitStringLeft.Set(i);
_hashSetLeft.Add(i);
leftBits.Add(i);
}
}

// Fill right collections with controlled intersection
var intersectionSize = (int)(leftBits.Count * IntersectionRate);
var rightOnlySize = (int)(leftBits.Count * FillRate) - intersectionSize;

// Add intersection elements
var intersectionIndices = leftBits.Take(intersectionSize).ToList();
foreach (var index in intersectionIndices)
{
_bitStringRight.Set(index);
_hashSetRight.Add(index);
}

// Add right-only elements
int rightOnlyAdded = 0;
for (int i = 0; i < N && rightOnlyAdded < rightOnlySize; i++)
{
if (!leftBits.Contains(i) && random.NextDouble() < FillRate)
{
_bitStringRight.Set(i);
_hashSetRight.Add(i);
rightOnlyAdded++;
}
}
}

[Benchmark(Baseline = true)]
public HashSet<int> HashSetIntersection()
{
var result = new HashSet<int>(_hashSetLeft);
result.IntersectWith(_hashSetRight);
return result;
}

[Benchmark]
public List<long> BitStringIntersection()
{
var leftCopy = new BitString(_bitStringLeft);
leftCopy.And(_bitStringRight);
return leftCopy.GetSetIndices();
}

[Benchmark]
public List<long> BitStringVectorIntersection()
{
var leftCopy = new BitString(_bitStringLeft);
leftCopy.VectorAnd(_bitStringRight);
return leftCopy.GetSetIndices();
}

[Benchmark]
public List<long> BitStringParallelIntersection()
{
var leftCopy = new BitString(_bitStringLeft);
leftCopy.ParallelAnd(_bitStringRight);
return leftCopy.GetSetIndices();
}

[Benchmark]
public List<long> BitStringParallelVectorIntersection()
{
var leftCopy = new BitString(_bitStringLeft);
leftCopy.ParallelVectorAnd(_bitStringRight);
return leftCopy.GetSetIndices();
}

[Benchmark]
public List<long> BitStringGetCommonIndices()
{
return _bitStringLeft.GetCommonIndices(_bitStringRight);
}

[Benchmark]
public long BitStringCountCommonBits()
{
return _bitStringLeft.CountCommonBits(_bitStringRight);
}

[Benchmark]
public bool BitStringHaveCommonBits()
{
return _bitStringLeft.HaveCommonBits(_bitStringRight);
}

[Benchmark]
public int HashSetIntersectionCount()
{
return _hashSetLeft.Intersect(_hashSetRight).Count();
}

[Benchmark]
public bool HashSetHaveCommon()
{
return _hashSetLeft.Overlaps(_hashSetRight);
}
}
}
20 changes: 19 additions & 1 deletion csharp/Platform.Collections.Benchmarks/Program.cs
Original file line number Diff line number Diff line change
@@ -1,9 +1,27 @@
using BenchmarkDotNet.Running;
using System;

namespace Platform.Collections.Benchmarks
{
static class Program
{
static void Main() => BenchmarkRunner.Run<BitStringBenchmarks>();
static void Main(string[] args)
{
if (args.Length > 0 && args[0] == "intersection")
{
BenchmarkRunner.Run<IntersectionPerformanceComparison>();
}
else if (args.Length > 0 && args[0] == "bitstring")
{
BenchmarkRunner.Run<BitStringBenchmarks>();
}
else
{
Console.WriteLine("Usage: dotnet run [intersection|bitstring]");
Console.WriteLine(" intersection - Run HashSet vs BitString intersection performance comparison");
Console.WriteLine(" bitstring - Run BitString operation benchmarks");
Console.WriteLine(" (no args) - Show this help");
}
}
}
}
135 changes: 135 additions & 0 deletions examples/intersection-performance-comparison.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,135 @@
# HashSet vs BitString Intersection Performance Comparison

This document describes the performance comparison benchmarks implemented to address [Issue #52](https://github.com/linksplatform/Collections/issues/52) - "Compare HashSet and BitString intersection performance".

## Implementation Overview

A comprehensive benchmark suite has been added to compare the intersection performance between `HashSet<int>` and `BitString` collections under various scenarios.

## Benchmark Class: `IntersectionPerformanceComparison`

Located in: `csharp/Platform.Collections.Benchmarks/IntersectionPerformanceComparison.cs`

### Test Parameters

The benchmark tests different combinations of:

- **Collection Size (N)**: 1,000, 10,000, 100,000, 1,000,000 elements
- **Fill Rate**: 0.1 (sparse), 0.5 (medium), 0.9 (dense)
- **Intersection Rate**: 0.1 (low overlap), 0.3 (medium overlap), 0.7 (high overlap)

### Benchmark Methods

#### HashSet Operations
- `HashSetIntersection()` - Standard HashSet.IntersectWith() method (baseline)
- `HashSetIntersectionCount()` - Count intersection elements using LINQ
- `HashSetHaveCommon()` - Check if sets overlap using Overlaps() method

#### BitString Operations
- `BitStringIntersection()` - BitString.And() with GetSetIndices()
- `BitStringVectorIntersection()` - BitString.VectorAnd() with GetSetIndices()
- `BitStringParallelIntersection()` - BitString.ParallelAnd() with GetSetIndices()
- `BitStringParallelVectorIntersection()` - BitString.ParallelVectorAnd() with GetSetIndices()
- `BitStringGetCommonIndices()` - Direct GetCommonIndices() method
- `BitStringCountCommonBits()` - Count intersection elements
- `BitStringHaveCommonBits()` - Check if bitstrings have common bits

## Running the Benchmarks

### Prerequisites

- .NET 8.0 SDK
- BenchmarkDotNet package (already included)

### Commands

```bash
# Navigate to the benchmark project
cd csharp/Platform.Collections.Benchmarks

# Run intersection performance comparison
dotnet run intersection

# Run original BitString benchmarks
dotnet run bitstring

# Show help
dotnet run
```

### Sample Benchmark Execution

```bash
dotnet run -- --configuration Release intersection
```

This will run the full benchmark suite with all parameter combinations, testing:
- 4 collection sizes Γ— 3 fill rates Γ— 3 intersection rates = 36 parameter combinations
- 11 different intersection methods per combination
- Total: 396 individual benchmark runs

## Expected Results Analysis

### When BitString Should Perform Better
- **Dense collections** (high fill rate): BitString uses compact bit operations
- **Large collections**: Vectorized and parallel operations provide advantages
- **Simple existence checks**: `HaveCommonBits()` vs `Overlaps()` should be faster
- **Counting operations**: Bit counting can be more efficient than enumeration

### When HashSet Should Perform Better
- **Sparse collections** (low fill rate): Less memory overhead and faster iteration
- **Small collections**: Lower setup overhead
- **Complex intersection results**: Direct set operations may be more efficient

### Performance Factors Tested

1. **Memory usage**: BitString has fixed memory based on max index, HashSet varies by element count
2. **CPU utilization**: BitString can leverage SIMD and parallelization
3. **Cache efficiency**: Compact bit representation vs pointer-based hash table
4. **Algorithmic complexity**: O(n) bit operations vs O(n+m) set operations

## Usage Examples

### Basic BitString Intersection
```csharp
var left = new BitString(1000);
var right = new BitString(1000);

// Setup bits...
left.Set(5); left.Set(10); left.Set(15);
right.Set(10); right.Set(15); right.Set(20);

// Intersection using And operation
var result = new BitString(left);
result.And(right);
var commonIndices = result.GetSetIndices(); // [10, 15]

// Or using direct method
var commonIndices2 = left.GetCommonIndices(right); // [10, 15]
```

### HashSet Intersection
```csharp
var left = new HashSet<int> { 5, 10, 15 };
var right = new HashSet<int> { 10, 15, 20 };

// Standard intersection
var result = new HashSet<int>(left);
result.IntersectWith(right); // {10, 15}

// Count intersection
int count = left.Intersect(right).Count(); // 2

// Check overlap
bool hasCommon = left.Overlaps(right); // true
```

## Key Insights

This benchmark suite enables empirical comparison of:
- Raw intersection performance across different data characteristics
- Memory efficiency for different sparsity patterns
- Scalability of parallel vs sequential approaches
- Effectiveness of hardware acceleration (SIMD) for bit operations

The results will help determine optimal collection choice based on specific use case requirements.
Loading