Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
29 changes: 15 additions & 14 deletions pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -117,7 +117,7 @@ under the License.
<!-- com.github plugins -->
<git-commit-id-plugin.version>4.9.10</git-commit-id-plugin.version>
<!-- org.apache.creadur plugins -->
<apache-rat-plugin.version>0.16.1</apache-rat-plugin.version>
<apache-rat-plugin.version>0.17</apache-rat-plugin.version>
<!-- org.eluder maven plugins -->
<coveralls-repo-token></coveralls-repo-token>
<coveralls-maven-plugin.version>4.3.0</coveralls-maven-plugin.version>
Expand Down Expand Up @@ -332,19 +332,20 @@ under the License.
<configuration>
<outputDirectory>${project.basedir}/rat</outputDirectory>
<consoleOutput>true</consoleOutput>
<useDefaultExcludes>true</useDefaultExcludes>
<excludes>
<!-- rat uses .gitignore for excludes by default -->
<exclude>**/*.yaml</exclude>
<exclude>**/*.yml</exclude>
<exclude>**/.*</exclude>
<exclude>**/test/resources/**/*.txt</exclude>
<exclude>**/git.properties</exclude>
<exclude>**/*.sk</exclude>
<exclude>LICENSE</exclude>
<exclude>NOTICE</exclude>
<exclude>**/*.code-workspace</exclude>
</excludes>
<inputExcludes>
<!-- I think rat uses .gitignore for excludes by default -->
<inputExcludeStd>StandardCollection</inputExcludeStd>
<inputExclude>**/*.yaml</inputExclude>
<inputExclude>**/*.yml</inputExclude>
<inputExclude>**/.*</inputExclude>
<inputExclude>**/test/resources/**/*.txt</inputExclude>
<inputExclude>**/git.properties</inputExclude>
<inputExclude>**/doc/**</inputExclude>
<inputExclude>**/*.sk</inputExclude>
<inputExclude>LICENSE</inputExclude>
<inputExclude>NOTICE</inputExclude>
<inputExclude>**/*.code-workspace</inputExclude>
</inputExcludes>
</configuration>
</plugin>

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,12 @@
* @author Jon Malkin
*/
public class ArrayOfBooleansSerDe extends ArrayOfItemsSerDe<Boolean> {

/**
* No argument constructor.
*/
public ArrayOfBooleansSerDe() { }

/**
* Computes number of bytes needed for packed bit encoding of the array of booleans. Rounds
* partial bytes up to return a whole number of bytes.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,11 @@
*/
public class ArrayOfDoublesSerDe extends ArrayOfItemsSerDe<Double> {

/**
* No argument constructor.
*/
public ArrayOfDoublesSerDe() { }

@Override
public byte[] serializeToByteArray(final Double item) {
Objects.requireNonNull(item, "Item must not be null");
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,11 @@
*/
public abstract class ArrayOfItemsSerDe<T> {

/**
* No argument constructor.
*/
public ArrayOfItemsSerDe() { }

/**
* Serialize a single unserialized item to a byte array.
*
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,11 @@
*/
public class ArrayOfLongsSerDe extends ArrayOfItemsSerDe<Long> {

/**
* No argument constructor.
*/
public ArrayOfLongsSerDe() { }

@Override
public byte[] serializeToByteArray(final Long item) {
Objects.requireNonNull(item, "Item must not be null");
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,6 @@
* @author Jon Malkin
*/
public class ArrayOfNumbersSerDe extends ArrayOfItemsSerDe<Number> {

// values selected to enable backwards compatibility
private static final byte LONG_INDICATOR = 12;
private static final byte INTEGER_INDICATOR = 9;
Expand All @@ -58,6 +57,11 @@ public class ArrayOfNumbersSerDe extends ArrayOfItemsSerDe<Number> {
private static final byte DOUBLE_INDICATOR = 4;
private static final byte FLOAT_INDICATOR = 6;

/**
* No argument constructor.
*/
public ArrayOfNumbersSerDe() { }

@Override
public byte[] serializeToByteArray(final Number item) {
Objects.requireNonNull(item, "Item must not be null");
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,11 @@
*/
public class ArrayOfStringsSerDe extends ArrayOfItemsSerDe<String> {

/**
* No argument constructor.
*/
public ArrayOfStringsSerDe() { }

@Override
public byte[] serializeToByteArray(final String item) {
Objects.requireNonNull(item, "Item must not be null");
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,11 @@
*/
public class ArrayOfUtf16StringsSerDe extends ArrayOfItemsSerDe<String> {

/**
* No argument constructor.
*/
public ArrayOfUtf16StringsSerDe() { }

@Override
public byte[] serializeToByteArray(final String item) {
Objects.requireNonNull(item, "Item must not be null");
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,11 @@
*/
public final class ByteArrayUtil {

/**
* No argument constructor.
*/
public ByteArrayUtil() { }

/**
* Copies bytes from source to target with offsets on both the source and target.
* @param source the given source
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -53,10 +53,14 @@ default void requestClose(final MemorySegment prevSeg) {
}

/**
* This class implements the defaults
* A convenience class that implements a default implementation.
*/
public static class Default implements MemorySegmentRequest {
//A convenience class that creates the target for the static member DEFAULT.

/**
* No argument constructor that creates the target for the static member DEFAULT.
*/
public Default() { }
}

/**
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -20,11 +20,10 @@
package org.apache.datasketches.common;

import java.lang.foreign.Arena;

import java.lang.foreign.MemorySegment;
import java.util.Enumeration;
import java.util.concurrent.ConcurrentHashMap;
import java.util.Objects;
import java.util.concurrent.ConcurrentHashMap;

/**
* This is an example of a possible implementation of the MemorySegmentRequest interface
Expand All @@ -35,6 +34,11 @@
public final class MemorySegmentRequestExample implements MemorySegmentRequest {
private final ConcurrentHashMap<MemorySegment, Arena> map = new ConcurrentHashMap<>();

/**
* No argument constructor.
*/
public MemorySegmentRequestExample() { }

/**
* Request a new off-heap MemorySegment with the given <i>newByteSeze</i>.
* An internal confined Arena is created to exclusively manage the new segment and it is associated
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,8 @@

/**
* Position operation violation.
*
* @author Lee Rhodes
*/
public class PositionInvariantsException extends RuntimeException {
private static final long serialVersionUID = 1L;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -112,7 +112,7 @@ public final PositionalImpl setStartPositionEnd(final long start, final long pos
* @param start the lowest start position
* @param pos the current position
* @param end the highest position
* @param cap the capacity of the backing buffer.
* @param cap the capacity of the backing resource.
*/
private static final void checkInvariants(final long start, final long pos, final long end,
final long cap) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@
import java.lang.foreign.MemorySegment;

/**
* Defines the read-only API for relative positional access to a resource.
* Defines the API for relative positional access to a MemorySegment.
*
* @author Lee Rhodes
*/
Expand Down Expand Up @@ -259,7 +259,7 @@ void getShortArray(
int dstOffsetShorts,
int lengthShorts);

//PRIMITIVE putX() and putXArray()
//PRIMITIVE setX() and setXArray()

/**
* Sets the boolean value at the current position.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,8 @@

/**
* Implementation of PositionalSegment
*
* @author Lee Rhodes
*/
final class PositionalSegmentImpl extends PositionalImpl implements PositionalSegment {
private static final byte SHORT_SHIFT = 1;
Expand Down Expand Up @@ -200,7 +202,7 @@ public void getShortArray(final short[] dstArray, final int dstOffsetShorts, fin
incrementPosition(lengthShorts << SHORT_SHIFT);
}

//PRIMITIVE putX() and putXArray() implementations
//PRIMITIVE setX() and setXArray() implementations

@Override
public void setBoolean(final boolean value) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,4 +17,11 @@
* under the License.
*/

/**
* This package provides the classes that define and implement a relative positional API.
*
* @see org.apache.datasketches.common.positional.Positional
*
* @author Lee Rhodes
*/
package org.apache.datasketches.common.positional;
20 changes: 20 additions & 0 deletions src/main/java/org/apache/datasketches/count/package-info.java
Original file line number Diff line number Diff line change
Expand Up @@ -17,4 +17,24 @@
* under the License.
*/

/**
* This package in intended for implementations of the the Count Sketch and the Count-min Sketch both of which can be used to estimate
* frequency-moments of a stream of distinct elements. They are different from the unique counting sketches (HLL, Theta, CPC, etc.)
* and different from the Frequent-Items Sketch.
*
* <p>A Count-Min sketch is a probabilistic data structure that estimates the frequency of items in a large data stream using a
* fixed amount of memory. It uses a 2D array of counters and multiple hash functions to map each item to a position in every row.
* To estimate an item's frequency, it takes the minimum count from all the positions it hashes to, which provides an overestimate
* that is guaranteed to be greater than or equal to the true count.</p>
*
* <p>A Count sketch is a type of dimensionality reduction that is particularly efficient in statistics, machine learning and
* algorithms. It was invented by Moses Charikar, Kevin Chen and Martin Farach-Colton in an effort to speed up the AMS Sketch by
* Alon, Matias and Szegedy for approximating the frequency moments of streams[4] (these calculations require counting of the
* number of occurrences for the distinct elements of the stream).</p>
*
* @see <a href="https://en.wikipedia.org/wiki/Count_sketch">https://en.wikipedia.org/wiki/Count_sketch</a>
* @see <a href="https://en.wikipedia.org/wiki/Count%E2%80%93min_sketch">https://en.wikipedia.org/wiki/Count%E2%80%93min_sketch</a>
*
*/

package org.apache.datasketches.count;
5 changes: 5 additions & 0 deletions src/main/java/org/apache/datasketches/cpc/TestUtil.java
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,11 @@
*/
public class TestUtil {

/**
* No argument constructor.
*/
public TestUtil() { }

static final double pwrLaw10NextDouble(final int ppb, final double curPoint) {
final double cur = (curPoint < 1.0) ? 1.0 : curPoint;
double gi = round(Math.log10(cur) * ppb); //current generating index
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,11 @@
*/
public final class BloomFilterBuilder {

/**
* No argument constructor.
*/
public BloomFilterBuilder() { }

/**
* Returns the optimal number of hash functions to given target numbers of distinct items
* and the BloomFilter size in bits. This function will provide a result even if the input
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -56,7 +56,10 @@
import org.apache.datasketches.common.SketchesStateException;

/**
* This sketch is useful for tracking approximate frequencies of items of type <i>&lt;T&gt;</i>
* This sketch is based on the paper <a href="https://arxiv.org/abs/1705.07001">https://arxiv.org/abs/1705.07001</a>
* ("A High-Performance Algorithm for Identifying Frequent Items in Data Streams"
* by Daniel Anderson, Pryce Bevan, Kevin Lang, Edo Liberty, Lee Rhodes, and Justin Thaler)
* and is useful for tracking approximate frequencies of items of type <i>&lt;T&gt;</i>
* with optional associated counts (<i>&lt;T&gt;</i> item, <i>long</i> count) that are members of a
* multiset of such items. The true frequency of an item is defined to be the sum of associated
* counts.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -54,7 +54,10 @@
import org.apache.datasketches.common.SuppressFBWarnings;

/**
* This sketch is useful for tracking approximate frequencies of <i>long</i> items with optional
* This sketch is based on the paper <a href="https://arxiv.org/abs/1705.07001">https://arxiv.org/abs/1705.07001</a>
* ("A High-Performance Algorithm for Identifying Frequent Items in Data Streams"
* by Daniel Anderson, Pryce Bevan, Kevin Lang, Edo Liberty, Lee Rhodes, and Justin Thaler)
* and is useful for tracking approximate frequencies of <i>long</i> items with optional
* associated counts (<i>long</i> item, <i>long</i> count) that are members of a multiset of
* such items. The true frequency of an item is defined to be the sum of associated counts.
*
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,4 +17,7 @@
* under the License.
*/

/**
* This package contains the implementations of the paper <a href="https://arxiv.org/abs/1705.07001">https://arxiv.org/abs/1705.07001</a>.
*/
package org.apache.datasketches.frequencies;
Original file line number Diff line number Diff line change
Expand Up @@ -44,16 +44,16 @@
* <p>This implementation produces exactly the same hash result as the
* MurmurHash3 function in datasketches-java given compatible inputs.</p>
*
* <p>This FFM21 version of the implementation leverages the java.lang.foreign package (FFM) of JDK-21 in place of
* <p>This FFM version of the implementation leverages the java.lang.foreign package (FFM) of JDK-25 in place of
* the Unsafe class.
*
* @author Lee Rhodes
*/
public final class MurmurHash3FFM21 {
public final class MurmurHash3FFM {
private static final long C1 = 0x87c37b91114253d5L;
private static final long C2 = 0x4cf5ad432745937fL;

private MurmurHash3FFM21() { }
private MurmurHash3FFM() { }

/**
* Returns a 128-bit hash of the input.
Expand Down Expand Up @@ -199,7 +199,7 @@ public static long[] hash(final MemorySegment seg, final long offsetBytes, final
final long k1 = seg.get(JAVA_LONG_UNALIGNED, cumOff); //0, 16, 32, ...
final long k2 = seg.get(JAVA_LONG_UNALIGNED, cumOff + 8); //8, 24, 40, ...

synchronized (MurmurHash3FFM21.class) {
synchronized (MurmurHash3FFM.class) {
cumOff += 16L;
rem -= 16L;
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@
import org.apache.datasketches.quantilescommon.DoublesSketchSortedView;
import org.apache.datasketches.quantilescommon.QuantileSearchCriteria;
import org.apache.datasketches.quantilescommon.QuantilesDoublesAPI;
import org.apache.datasketches.quantilescommon.QuantilesDoublesSketchIterator;
import org.apache.datasketches.quantilescommon.QuantilesDoublesSketchIteratorAPI;

/**
* This variation of the KllSketch implements primitive doubles.
Expand Down Expand Up @@ -266,7 +266,7 @@ public double[] getRanks(final double[] quantiles, final QuantileSearchCriteria
}

@Override
public QuantilesDoublesSketchIterator iterator() {
public QuantilesDoublesSketchIteratorAPI iterator() {
return new KllDoublesSketchIterator(
getDoubleItemsArray(), getLevelsArray(SketchStructure.UPDATABLE), getNumLevels());
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -19,12 +19,12 @@

package org.apache.datasketches.kll;

import org.apache.datasketches.quantilescommon.QuantilesDoublesSketchIterator;
import org.apache.datasketches.quantilescommon.QuantilesDoublesSketchIteratorAPI;

/**
* Iterator over KllDoublesSketch. The order is not defined.
*/
public final class KllDoublesSketchIterator extends KllSketchIterator implements QuantilesDoublesSketchIterator {
public final class KllDoublesSketchIterator extends KllSketchIterator implements QuantilesDoublesSketchIteratorAPI {
private final double[] quantiles;

KllDoublesSketchIterator(final double[] quantiles, final int[] levelsArr, final int numLevels) {
Expand Down
Loading
Loading