Skip to content

Commit 79fcb5e

Browse files
committed
Binary search refactoring.
1 parent c1bca3f commit 79fcb5e

File tree

7 files changed

+340
-299
lines changed

7 files changed

+340
-299
lines changed
Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,29 @@
1+
package com.dannemann.stringcompressor;
2+
3+
public abstract class BaseBinarySearch {
4+
5+
protected final byte[][] compressedMass;
6+
protected final boolean prefixSearch;
7+
protected final byte[] charset;
8+
9+
public BaseBinarySearch(byte[][] compressedMass, boolean prefixSearch, byte[] charset) {
10+
this.compressedMass = compressedMass;
11+
this.prefixSearch = prefixSearch;
12+
this.charset = charset;
13+
}
14+
15+
public abstract int search(final byte[] key);
16+
17+
public byte[][] getCompressedMass() {
18+
return compressedMass;
19+
}
20+
21+
public boolean isPrefixSearch() {
22+
return prefixSearch;
23+
}
24+
25+
public byte[] getCharset() {
26+
return charset;
27+
}
28+
29+
}
Lines changed: 31 additions & 38 deletions
Original file line numberDiff line numberDiff line change
@@ -1,36 +1,57 @@
11
package com.dannemann.stringcompressor;
22

3-
import static com.dannemann.stringcompressor.AsciiCompressor.getBytes;
43
import static com.dannemann.stringcompressor.FiveBitAsciiCompressor.DEFAULT_5BIT_CHARSET;
54

65
/**
76
* <p>Performs binary search (including prefix search) on data compressed by {@link FiveBitAsciiCompressor}.
87
* Particularly useful when searching large amounts of compressed data stored in memory.</p>
98
* <p>The data must have been sorted prior to compression.</p>
9+
* <p>If {@code prefixSearch} is set to {@code true}, the method searches for an element whose prefix matches the
10+
* specified key. Otherwise, it searches for an exact match. If there are multiple elements with the same prefix, the
11+
* first matching element is returned.</p>
1012
* <p>Note that character ordering depends on the sequence defined in your custom charset (via {@code supportedCharset}),
1113
* which is passed to the compressor constructor (see {@link FiveBitAsciiCompressor#FiveBitAsciiCompressor(byte[])}).
1214
* If no custom charset is provided, compressors use a default charset ordered by ASCII.</p>
1315
* @author Jean Dannemann Carone
16+
* @see FiveBitAsciiCompressor#DEFAULT_5BIT_CHARSET
1417
*/
15-
public final class FiveBitBinarySearch {
18+
public final class FiveBitBinarySearch extends BaseBinarySearch {
19+
20+
/**
21+
* Creates a binary search object for data compressed with the default character set {@link FiveBitAsciiCompressor#DEFAULT_5BIT_CHARSET}.
22+
* @param compressedMass The mass of compressed strings to search through.
23+
* @param prefixSearch If {@code true}, searches for elements starting with the provided key prefix (must be unique).
24+
* @author Jean Dannemann Carone
25+
* @see FiveBitBinarySearch#FiveBitBinarySearch(byte[][], boolean, byte[])
26+
*/
27+
public FiveBitBinarySearch(byte[][] compressedMass, boolean prefixSearch) {
28+
super(compressedMass, prefixSearch, DEFAULT_5BIT_CHARSET);
29+
}
30+
31+
/**
32+
* Creates a binary search object.
33+
* @param compressedMass The mass of compressed strings to search through.
34+
* @param prefixSearch If {@code true}, searches for elements starting with the provided key prefix (must be unique).
35+
* @param charset Character set used to compress {@code compressedMass}.
36+
* @author Jean Dannemann Carone
37+
*/
38+
public FiveBitBinarySearch(byte[][] compressedMass, boolean prefixSearch, byte[] charset) {
39+
super(compressedMass, prefixSearch, charset);
40+
}
1641

1742
/**
1843
* <p>Performs a binary search on the provided compressed data array to locate the specified key.</p>
1944
* <p>The compressed data is expected to be produced by {@link FiveBitAsciiCompressor} and must be sorted before
2045
* compression for this search to work correctly. The search is performed directly on the compressed form without
2146
* decompressing the entire dataset, enabling fast lookups in large in-memory compressed collections.</p>
22-
* <p>If {@code prefixSearch} is set to {@code true}, the method searches for an element whose prefix matches the
23-
* specified key. Otherwise, it searches for an exact match. If there are multiple elements with the same prefix, the
24-
* first matching element is returned.</p>
2547
* <p>The method returns the index of the matching element if found; otherwise, it returns
2648
* {@code -(insertion point) - 1}, following the contract of {@link java.util.Arrays#binarySearch}.</p>
27-
* @param compressedMass The array of compressed byte array strings to search through.
2849
* @param key The uncompressed key to search for, as a byte array.
29-
* @param prefixSearch If {@code true}, searches for elements starting with the provided key prefix (must be unique).
3050
* @return The index of the search key if it is found; otherwise, {@code -(insertion point) - 1}.
3151
* @author Jean Dannemann Carone
3252
*/
33-
public static int search(final byte[][] compressedMass, final byte[] key, boolean prefixSearch) {
53+
@Override
54+
public int search(final byte[] key) {
3455
final int massLength = compressedMass.length;
3556

3657
if (massLength == 0)
@@ -53,9 +74,9 @@ public static int search(final byte[][] compressedMass, final byte[] key, boolea
5374
bits += 8;
5475

5576
if (bits >= 5 &&
56-
(cmp = DEFAULT_5BIT_CHARSET[buffer >>> (bits -= 5) & 0x1F] - key[j++]) != 0 ||
77+
(cmp = charset[buffer >>> (bits -= 5) & 0x1F] - key[j++]) != 0 ||
5778
bits >= 5 && j < keyLen &&
58-
(cmp = DEFAULT_5BIT_CHARSET[buffer >>> (bits -= 5) & 0x1F] - key[j++]) != 0)
79+
(cmp = charset[buffer >>> (bits -= 5) & 0x1F] - key[j++]) != 0)
5980
break;
6081
}
6182

@@ -79,32 +100,4 @@ else if (cmp > 0)
79100
return -(low + 1);
80101
}
81102

82-
/**
83-
* Overloaded version of {@link #search(byte[][], byte[], boolean)} where parameter {@code prefixSearch = false}.
84-
*/
85-
public static int search(final byte[][] compressedMass, final byte[] key) {
86-
return search(compressedMass, key, false);
87-
}
88-
89-
/**
90-
* Overloaded version of {@link #search(byte[][], byte[], boolean)} where parameter {@code prefixSearch = false}.
91-
*/
92-
public static int search(final byte[][] compressedMass, final String key) {
93-
return search(compressedMass, getBytes(key));
94-
}
95-
96-
/**
97-
* Overloaded version of {@link #search(byte[][], byte[], boolean)} where parameter {@code prefixSearch = true}.
98-
*/
99-
public static int prefixSearch(final byte[][] compressedMass, final byte[] key) {
100-
return search(compressedMass, key, true);
101-
}
102-
103-
/**
104-
* Overloaded version of {@link #search(byte[][], byte[], boolean)} where parameter {@code prefixSearch = true}.
105-
*/
106-
public static int prefixSearch(final byte[][] compressedMass, final String key) {
107-
return prefixSearch(compressedMass, getBytes(key));
108-
}
109-
110103
}

src/main/java/com/dannemann/stringcompressor/FourBitBinarySearch.java

Lines changed: 29 additions & 36 deletions
Original file line numberDiff line numberDiff line change
@@ -1,36 +1,57 @@
11
package com.dannemann.stringcompressor;
22

3-
import static com.dannemann.stringcompressor.AsciiCompressor.getBytes;
43
import static com.dannemann.stringcompressor.FourBitAsciiCompressor.DEFAULT_4BIT_CHARSET;
54

65
/**
76
* <p>Performs binary search (including prefix search) on data compressed by {@link FourBitAsciiCompressor}.
87
* Particularly useful when searching large amounts of compressed data stored in memory.</p>
98
* <p>The data must have been sorted prior to compression.</p>
9+
* <p>If {@code prefixSearch} is set to {@code true}, the method searches for an element whose prefix matches the
10+
* specified key. Otherwise, it searches for an exact match. If there are multiple elements with the same prefix, the
11+
* first matching element is returned.</p>
1012
* <p>Note that character ordering depends on the sequence defined in your custom charset (via {@code supportedCharset}),
1113
* which is passed to the compressor constructor (see {@link FourBitAsciiCompressor#FourBitAsciiCompressor(byte[])}).
1214
* If no custom charset is provided, compressors use a default charset ordered by ASCII.</p>
1315
* @author Jean Dannemann Carone
16+
* @see FourBitAsciiCompressor#DEFAULT_4BIT_CHARSET
1417
*/
15-
public final class FourBitBinarySearch {
18+
public final class FourBitBinarySearch extends BaseBinarySearch {
19+
20+
/**
21+
* Creates a binary search object for data compressed with the default character set {@link FourBitAsciiCompressor#DEFAULT_4BIT_CHARSET}.
22+
* @param compressedMass The mass of compressed strings to search through.
23+
* @param prefixSearch If {@code true}, searches for elements starting with the provided key prefix (must be unique).
24+
* @author Jean Dannemann Carone
25+
* @see FourBitBinarySearch#FourBitBinarySearch(byte[][], boolean, byte[])
26+
*/
27+
public FourBitBinarySearch(byte[][] compressedMass, boolean prefixSearch) {
28+
super(compressedMass, prefixSearch, DEFAULT_4BIT_CHARSET);
29+
}
30+
31+
/**
32+
* Creates a binary search object.
33+
* @param compressedMass The mass of compressed strings to search through.
34+
* @param prefixSearch If {@code true}, searches for elements starting with the provided key prefix (must be unique).
35+
* @param charset Character set used to compress {@code compressedMass}.
36+
* @author Jean Dannemann Carone
37+
*/
38+
public FourBitBinarySearch(byte[][] compressedMass, boolean prefixSearch, byte[] charset) {
39+
super(compressedMass, prefixSearch, charset);
40+
}
1641

1742
/**
1843
* <p>Performs a binary search on the provided compressed data array to locate the specified key.</p>
1944
* <p>The compressed data is expected to be produced by {@link FourBitAsciiCompressor} and must be sorted before
2045
* compression for this search to work correctly. The search is performed directly on the compressed form without
2146
* decompressing the entire dataset, enabling fast lookups in large in-memory compressed collections.</p>
22-
* <p>If {@code prefixSearch} is set to {@code true}, the method searches for an element whose prefix matches the
23-
* specified key. Otherwise, it searches for an exact match. If there are multiple elements with the same prefix, the
24-
* first matching element is returned.</p>
2547
* <p>The method returns the index of the matching element if found; otherwise, it returns
2648
* {@code -(insertion point) - 1}, following the contract of {@link java.util.Arrays#binarySearch}.</p>
27-
* @param compressedMass The array of compressed byte array strings to search through.
2849
* @param key The uncompressed key to search for, as a byte array.
29-
* @param prefixSearch If {@code true}, searches for elements starting with the provided key prefix (must be unique).
3050
* @return The index of the search key if it is found; otherwise, {@code -(insertion point) - 1}.
3151
* @author Jean Dannemann Carone
3252
*/
33-
public static int search(final byte[][] compressedMass, final byte[] key, boolean prefixSearch) {
53+
@Override
54+
public int search(final byte[] key) {
3455
final int massLength = compressedMass.length;
3556

3657
if (massLength == 0)
@@ -88,32 +109,4 @@ else if (cmp > 0)
88109
return -(low + 1);
89110
}
90111

91-
/**
92-
* Overloaded version of {@link #search(byte[][], byte[], boolean)} where parameter {@code prefixSearch = false}.
93-
*/
94-
public static int search(final byte[][] compressedMass, final byte[] key) {
95-
return search(compressedMass, key, false);
96-
}
97-
98-
/**
99-
* Overloaded version of {@link #search(byte[][], byte[], boolean)} where parameter {@code prefixSearch = false}.
100-
*/
101-
public static int search(final byte[][] compressedMass, final String key) {
102-
return search(compressedMass, getBytes(key));
103-
}
104-
105-
/**
106-
* Overloaded version of {@link #search(byte[][], byte[], boolean)} where parameter {@code prefixSearch = true}.
107-
*/
108-
public static int prefixSearch(final byte[][] compressedMass, final byte[] key) {
109-
return search(compressedMass, key, true);
110-
}
111-
112-
/**
113-
* Overloaded version of {@link #search(byte[][], byte[], boolean)} where parameter {@code prefixSearch = true}.
114-
*/
115-
public static int prefixSearch(final byte[][] compressedMass, final String key) {
116-
return prefixSearch(compressedMass, getBytes(key));
117-
}
118-
119112
}

src/main/java/com/dannemann/stringcompressor/SixBitBinarySearch.java

Lines changed: 29 additions & 36 deletions
Original file line numberDiff line numberDiff line change
@@ -1,36 +1,57 @@
11
package com.dannemann.stringcompressor;
22

3-
import static com.dannemann.stringcompressor.AsciiCompressor.getBytes;
43
import static com.dannemann.stringcompressor.SixBitAsciiCompressor.DEFAULT_6BIT_CHARSET;
54

65
/**
76
* <p>Performs binary search (including prefix search) on data compressed by {@link SixBitAsciiCompressor}.
87
* Particularly useful when searching large amounts of compressed data stored in memory.</p>
98
* <p>The data must have been sorted prior to compression.</p>
9+
* <p>If {@code prefixSearch} is set to {@code true}, the method searches for an element whose prefix matches the
10+
* specified key. Otherwise, it searches for an exact match. If there are multiple elements with the same prefix, the
11+
* first matching element is returned.</p>
1012
* <p>Note that character ordering depends on the sequence defined in your custom charset (via {@code supportedCharset}),
1113
* which is passed to the compressor constructor (see {@link SixBitAsciiCompressor#SixBitAsciiCompressor(byte[])}).
1214
* If no custom charset is provided, compressors use a default charset ordered by ASCII.</p>
1315
* @author Jean Dannemann Carone
16+
* @see SixBitAsciiCompressor#DEFAULT_6BIT_CHARSET
1417
*/
15-
public final class SixBitBinarySearch {
18+
public final class SixBitBinarySearch extends BaseBinarySearch {
19+
20+
/**
21+
* Creates a binary search object for data compressed with the default character set {@link SixBitAsciiCompressor#DEFAULT_6BIT_CHARSET}.
22+
* @param compressedMass The mass of compressed strings to search through.
23+
* @param prefixSearch If {@code true}, searches for elements starting with the provided key prefix (must be unique).
24+
* @author Jean Dannemann Carone
25+
* @see SixBitBinarySearch#SixBitBinarySearch(byte[][], boolean, byte[])
26+
*/
27+
public SixBitBinarySearch(byte[][] compressedMass, boolean prefixSearch) {
28+
super(compressedMass, prefixSearch, DEFAULT_6BIT_CHARSET);
29+
}
30+
31+
/**
32+
* Creates a binary search object.
33+
* @param compressedMass The mass of compressed strings to search through.
34+
* @param prefixSearch If {@code true}, searches for elements starting with the provided key prefix (must be unique).
35+
* @param charset Character set used to compress {@code compressedMass}.
36+
* @author Jean Dannemann Carone
37+
*/
38+
public SixBitBinarySearch(byte[][] compressedMass, boolean prefixSearch, byte[] charset) {
39+
super(compressedMass, prefixSearch, charset);
40+
}
1641

1742
/**
1843
* <p>Performs a binary search on the provided compressed data array to locate the specified key.</p>
1944
* <p>The compressed data is expected to be produced by {@link SixBitAsciiCompressor} and must be sorted before
2045
* compression for this search to work correctly. The search is performed directly on the compressed form without
2146
* decompressing the entire dataset, enabling fast lookups in large in-memory compressed collections.</p>
22-
* <p>If {@code prefixSearch} is set to {@code true}, the method searches for an element whose prefix matches the
23-
* specified key. Otherwise, it searches for an exact match. If there are multiple elements with the same prefix, the
24-
* first matching element is returned.</p>
2547
* <p>The method returns the index of the matching element if found; otherwise, it returns
2648
* {@code -(insertion point) - 1}, following the contract of {@link java.util.Arrays#binarySearch}.</p>
27-
* @param compressedMass The array of compressed byte array strings to search through.
2849
* @param key The uncompressed key to search for, as a byte array.
29-
* @param prefixSearch If {@code true}, searches for elements starting with the provided key prefix (must be unique).
3050
* @return The index of the search key if it is found; otherwise, {@code -(insertion point) - 1}.
3151
* @author Jean Dannemann Carone
3252
*/
33-
public static int search(final byte[][] compressedMass, final byte[] key, boolean prefixSearch) {
53+
@Override
54+
public int search(final byte[] key) {
3455
final int massLength = compressedMass.length;
3556

3657
if (massLength == 0)
@@ -79,32 +100,4 @@ else if (cmp > 0)
79100
return -(low + 1);
80101
}
81102

82-
/**
83-
* Overloaded version of {@link #search(byte[][], byte[], boolean)} where parameter {@code prefixSearch = false}.
84-
*/
85-
public static int search(final byte[][] compressedMass, final byte[] key) {
86-
return search(compressedMass, key, false);
87-
}
88-
89-
/**
90-
* Overloaded version of {@link #search(byte[][], byte[], boolean)} where parameter {@code prefixSearch = false}.
91-
*/
92-
public static int search(final byte[][] compressedMass, final String key) {
93-
return search(compressedMass, getBytes(key));
94-
}
95-
96-
/**
97-
* Overloaded version of {@link #search(byte[][], byte[], boolean)} where parameter {@code prefixSearch = true}.
98-
*/
99-
public static int prefixSearch(final byte[][] compressedMass, final byte[] key) {
100-
return search(compressedMass, key, true);
101-
}
102-
103-
/**
104-
* Overloaded version of {@link #search(byte[][], byte[], boolean)} where parameter {@code prefixSearch = true}.
105-
*/
106-
public static int prefixSearch(final byte[][] compressedMass, final String key) {
107-
return prefixSearch(compressedMass, getBytes(key));
108-
}
109-
110103
}

0 commit comments

Comments
 (0)