Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
38 changes: 36 additions & 2 deletions tdigest/include/tdigest.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -257,16 +257,29 @@ class tdigest {
*/
static tdigest deserialize(const void* bytes, size_t size, const Allocator& allocator = Allocator());

class const_iterator;

/**
* Iterator pointing to the first centroid in the sketch.
* If the sketch is empty, the returned iterator must not be dereferenced or incremented.
* @return iterator pointing to the first centroid in the sketch
*/
const_iterator begin() const;

/**
* Iterator pointing to the past-the-end centroid in the sketch.
* It does not point to any centroid, and must not be dereferenced or incremented.
* @return iterator pointing to the past-the-end centroid in the sketch
*/
const_iterator end() const;
private:
bool reverse_merge_;
uint16_t k_;
uint16_t internal_k_;
T min_;
T max_;
size_t centroids_capacity_;
vector_centroid centroids_;
uint64_t centroids_weight_;
size_t buffer_capacity_;
vector_t buffer_;

static const size_t BUFFER_MULTIPLIER = 4;
Expand Down Expand Up @@ -297,6 +310,27 @@ class tdigest {
static inline void check_split_points(const T* values, uint32_t size);
};

template<typename T, typename A>
class tdigest<T, A>::const_iterator {
public:
using iterator_category = std::input_iterator_tag;
using value_type = std::pair<const T&, const W>;
using difference_type = void;
using pointer = const return_value_holder<value_type>;
using reference = const value_type;

const_iterator& operator++();
const_iterator& operator++(int);
bool operator==(const const_iterator& other) const;
bool operator!=(const const_iterator& other) const;
reference operator*() const;
pointer operator->() const;
private:
friend class tdigest;
uint32_t index_;
vector_centroid centroids_;
const_iterator(const tdigest& tdigest_, bool is_end);
};
} /* namespace datasketches */

#include "tdigest_impl.hpp"
Expand Down
59 changes: 59 additions & 0 deletions tdigest/include/tdigest_impl.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -627,6 +627,65 @@ void tdigest<T, A>::check_split_points(const T* values, uint32_t size) {
}
}

template <typename T, typename A>
typename tdigest<T, A>::const_iterator tdigest<T, A>::begin() const {
return tdigest<T, A>::const_iterator(*this, false);
}

template <typename T, typename A>
typename tdigest<T, A>::const_iterator tdigest<T, A>::end() const {
return tdigest::const_iterator(*this, true);
}

template<typename T, typename A>
tdigest<T, A>::const_iterator::const_iterator(const tdigest& tdigest_, const bool is_end):
centroids_(tdigest_.get_allocator())
{
// Create a copy of the tdigest to generate the centroids after processing the buffered values
tdigest tmp(tdigest_);
tmp.compress();
centroids_.insert(centroids_.end(), tmp.centroids_.begin(), tmp.centroids_.end());

if (is_end) {
index_ = centroids_.size();
} else {
index_ = 0;
}
}

template<typename T, typename A>
typename tdigest<T, A>::const_iterator& tdigest<T, A>::const_iterator::operator++() {
++index_;
return *this;
}

template<typename T, typename A>
typename tdigest<T, A>::const_iterator& tdigest<T, A>::const_iterator::operator++(int) {
const_iterator tmp(*this);
operator++();
return tmp;
}

template<typename T, typename A>
bool tdigest<T, A>::const_iterator::operator==(const const_iterator& other) const {
return index_ == other.index_;
}

template<typename T, typename A>
bool tdigest<T, A>::const_iterator::operator!=(const const_iterator& other) const {
return !operator==(other);
}

template<typename T, typename A>
auto tdigest<T, A>::const_iterator::operator*() const -> reference {
return value_type(centroids_[index_].get_mean(), centroids_[index_].get_weight());
}

template<typename T, typename A>
auto tdigest<T, A>::const_iterator::operator->() const -> pointer {
return **this;
}

} /* namespace datasketches */

#endif // _TDIGEST_IMPL_HPP_
17 changes: 17 additions & 0 deletions tdigest/test/tdigest_test.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -453,4 +453,21 @@ TEST_CASE("deserialize from reference implementation bytes float", "[tdigest]")
REQUIRE(td.get_rank(n) == 1);
}

TEST_CASE("iterate centroids", "[tdigest]") {
tdigest_double td(100);
for (int i = 0; i < 10; i++) {
td.update(i);
}

auto centroid_count = 0;
uint64_t total_weight = 0;
for (const auto &centroid: td) {
centroid_count++;
total_weight += centroid.second;
}
// Ensure that centroids are retrieved for a case where there is buffered values
REQUIRE(centroid_count == 10);
REQUIRE(td.get_total_weight() == total_weight);
}

} /* namespace datasketches */