11package com .dannemann .stringcompressor ;
22
3- import static com .dannemann .stringcompressor .AsciiCompressor .getBytes ;
43import static com .dannemann .stringcompressor .FiveBitAsciiCompressor .DEFAULT_5BIT_CHARSET ;
54
65/**
76 * <p>Performs binary search (including prefix search) on data compressed by {@link FiveBitAsciiCompressor}.
87 * Particularly useful when searching large amounts of compressed data stored in memory.</p>
98 * <p>The data must have been sorted prior to compression.</p>
9+ * <p>If {@code prefixSearch} is set to {@code true}, the method searches for an element whose prefix matches the
10+ * specified key. Otherwise, it searches for an exact match. If there are multiple elements with the same prefix, the
11+ * first matching element is returned.</p>
1012 * <p>Note that character ordering depends on the sequence defined in your custom charset (via {@code supportedCharset}),
1113 * which is passed to the compressor constructor (see {@link FiveBitAsciiCompressor#FiveBitAsciiCompressor(byte[])}).
1214 * If no custom charset is provided, compressors use a default charset ordered by ASCII.</p>
1315 * @author Jean Dannemann Carone
16+ * @see FiveBitAsciiCompressor#DEFAULT_5BIT_CHARSET
1417 */
15- public final class FiveBitBinarySearch {
18+ public final class FiveBitBinarySearch extends BaseBinarySearch {
19+
20+ /**
21+ * Creates a binary search object for data compressed with the default character set {@link FiveBitAsciiCompressor#DEFAULT_5BIT_CHARSET}.
22+ * @param compressedMass The mass of compressed strings to search through.
23+ * @param prefixSearch If {@code true}, searches for elements starting with the provided key prefix (must be unique).
24+ * @author Jean Dannemann Carone
25+ * @see FiveBitBinarySearch#FiveBitBinarySearch(byte[][], boolean, byte[])
26+ */
27+ public FiveBitBinarySearch (byte [][] compressedMass , boolean prefixSearch ) {
28+ super (compressedMass , prefixSearch , DEFAULT_5BIT_CHARSET );
29+ }
30+
31+ /**
32+ * Creates a binary search object.
33+ * @param compressedMass The mass of compressed strings to search through.
34+ * @param prefixSearch If {@code true}, searches for elements starting with the provided key prefix (must be unique).
35+ * @param charset Character set used to compress {@code compressedMass}.
36+ * @author Jean Dannemann Carone
37+ */
38+ public FiveBitBinarySearch (byte [][] compressedMass , boolean prefixSearch , byte [] charset ) {
39+ super (compressedMass , prefixSearch , charset );
40+ }
1641
1742 /**
1843 * <p>Performs a binary search on the provided compressed data array to locate the specified key.</p>
1944 * <p>The compressed data is expected to be produced by {@link FiveBitAsciiCompressor} and must be sorted before
2045 * compression for this search to work correctly. The search is performed directly on the compressed form without
2146 * decompressing the entire dataset, enabling fast lookups in large in-memory compressed collections.</p>
22- * <p>If {@code prefixSearch} is set to {@code true}, the method searches for an element whose prefix matches the
23- * specified key. Otherwise, it searches for an exact match. If there are multiple elements with the same prefix, the
24- * first matching element is returned.</p>
2547 * <p>The method returns the index of the matching element if found; otherwise, it returns
2648 * {@code -(insertion point) - 1}, following the contract of {@link java.util.Arrays#binarySearch}.</p>
27- * @param compressedMass The array of compressed byte array strings to search through.
2849 * @param key The uncompressed key to search for, as a byte array.
29- * @param prefixSearch If {@code true}, searches for elements starting with the provided key prefix (must be unique).
3050 * @return The index of the search key if it is found; otherwise, {@code -(insertion point) - 1}.
3151 * @author Jean Dannemann Carone
3252 */
33- public static int search (final byte [][] compressedMass , final byte [] key , boolean prefixSearch ) {
53+ @ Override
54+ public int search (final byte [] key ) {
3455 final int massLength = compressedMass .length ;
3556
3657 if (massLength == 0 )
@@ -53,9 +74,9 @@ public static int search(final byte[][] compressedMass, final byte[] key, boolea
5374 bits += 8 ;
5475
5576 if (bits >= 5 &&
56- (cmp = DEFAULT_5BIT_CHARSET [buffer >>> (bits -= 5 ) & 0x1F ] - key [j ++]) != 0 ||
77+ (cmp = charset [buffer >>> (bits -= 5 ) & 0x1F ] - key [j ++]) != 0 ||
5778 bits >= 5 && j < keyLen &&
58- (cmp = DEFAULT_5BIT_CHARSET [buffer >>> (bits -= 5 ) & 0x1F ] - key [j ++]) != 0 )
79+ (cmp = charset [buffer >>> (bits -= 5 ) & 0x1F ] - key [j ++]) != 0 )
5980 break ;
6081 }
6182
@@ -79,32 +100,4 @@ else if (cmp > 0)
79100 return -(low + 1 );
80101 }
81102
82- /**
83- * Overloaded version of {@link #search(byte[][], byte[], boolean)} where parameter {@code prefixSearch = false}.
84- */
85- public static int search (final byte [][] compressedMass , final byte [] key ) {
86- return search (compressedMass , key , false );
87- }
88-
89- /**
90- * Overloaded version of {@link #search(byte[][], byte[], boolean)} where parameter {@code prefixSearch = false}.
91- */
92- public static int search (final byte [][] compressedMass , final String key ) {
93- return search (compressedMass , getBytes (key ));
94- }
95-
96- /**
97- * Overloaded version of {@link #search(byte[][], byte[], boolean)} where parameter {@code prefixSearch = true}.
98- */
99- public static int prefixSearch (final byte [][] compressedMass , final byte [] key ) {
100- return search (compressedMass , key , true );
101- }
102-
103- /**
104- * Overloaded version of {@link #search(byte[][], byte[], boolean)} where parameter {@code prefixSearch = true}.
105- */
106- public static int prefixSearch (final byte [][] compressedMass , final String key ) {
107- return prefixSearch (compressedMass , getBytes (key ));
108- }
109-
110103}
0 commit comments