Conversation
| } | ||
|
|
||
| [[nodiscard]] PROMPP_ALWAYS_INLINE uint32_t size() const noexcept { return storage_.count(); } | ||
| [[nodiscard]] PROMPP_ALWAYS_INLINE uint32_t series_count() const noexcept { |
There was a problem hiding this comment.
| [[nodiscard]] PROMPP_ALWAYS_INLINE uint32_t series_count() const noexcept { | |
| [[nodiscard]] PROMPP_ALWAYS_INLINE uint32_t items_count() const noexcept { |
or
| [[nodiscard]] PROMPP_ALWAYS_INLINE uint32_t series_count() const noexcept { | |
| [[nodiscard]] PROMPP_ALWAYS_INLINE uint32_t count() const noexcept { |
| } | ||
| } | ||
| // Returns exclusive upper bound for ids that may be requested from the table. | ||
| [[nodiscard]] PROMPP_ALWAYS_INLINE uint32_t max_item_index() const noexcept { |
There was a problem hiding this comment.
The only implementation max_item_index_impl in code is:
[[nodiscard]] PROMPP_ALWAYS_INLINE uint32_t max_item_index_impl() const noexcept { return next_item_index_impl(); }Maybe remove this method?
| } | ||
|
|
||
| uint32_t max_ls_id = 0; | ||
| for (auto ls_id : ls_id_set_) { |
There was a problem hiding this comment.
accept max_ls_id as input parameter
| const auto from_find = lss_.find(ls3); | ||
|
|
||
| // Assert | ||
| EXPECT_GE(new_id, shrink_boundary); |
There was a problem hiding this comment.
Instead of EXPECT/ASSERT_GE/LT use EXPECT/ASSERT with concrete value
| Lss loaded; | ||
| load_lss_with_single_series(ls, loaded, target_id); | ||
| const bool was_active_before = target_id < loaded.added_series().size() && loaded.added_series()[target_id]; | ||
| EXPECT_FALSE(was_active_before); |
There was a problem hiding this comment.
EXPECT in arrange section.
And what this test testing?
| EXPECT_EQ(2U, lss_copy.series_count()); | ||
| } | ||
|
|
||
| TEST_F(BimapCopierFixture, CopyFindsCopiedSeries) { |
There was a problem hiding this comment.
CopyFindsCopiedSeries and CopyKeepsSeriesCount can be merged into single test
| copier2.copy_added_series_and_build_indexes(); | ||
|
|
||
| // Act | ||
| [[maybe_unused]] const auto ls_id = lss_copy_of_copy.find_or_emplace(label_set3); |
There was a problem hiding this comment.
| [[maybe_unused]] const auto ls_id = lss_copy_of_copy.find_or_emplace(label_set3); | |
| std::ignore = lss_copy_of_copy.find_or_emplace(label_set3); |
| EXPECT_EQ(ls3_, lss_[2]); | ||
| } | ||
|
|
||
| TEST_F(BimapShrinkFixture, ShrunkStateSeriesCountMatchesStorage) { |
There was a problem hiding this comment.
This test can be merged with FinalizeShrinkMapsSeriesInOrder
| EXPECT_EQ(3U, lss_.max_item_index()); | ||
| } | ||
|
|
||
| TEST_F(BimapShrinkFixture, IndexWriteContextDedupesSymbolsAfterFullShrink) { |
There was a problem hiding this comment.
IndexWriteContextDedupesSymbolsAfterFullShrink and IndexWriteContextResolvesRefsAfterFullShrink are test for index writer
|
|
||
| struct ExportSymbolIdHasher { | ||
| [[nodiscard]] size_t operator()(const ExportSymbolId& id) const noexcept { | ||
| const uint64_t composite = (static_cast<uint64_t>(id.source) << 62U) ^ (static_cast<uint64_t>(id.name_id) << 31U) ^ static_cast<uint64_t>(id.value_id); |
| public: | ||
| #pragma pack(push, 1) | ||
| struct ExportSymbolId { | ||
| SymbolSource source{SymbolSource::kCurrent}; |
There was a problem hiding this comment.
Most likely, name_id will not be a big value and you can use the highest bit for source
|
|
||
| template <class Callback> | ||
| void for_each_symbol(Callback&& callback) const { | ||
| for (uint32_t symbol_ref = 0; symbol_ref < symbols_.size(); ++symbol_ref) { |
There was a problem hiding this comment.
I think that loop with pointer to symbols_ item or loop on iterators will be more efficient
|
|
||
| void collect_current_symbols_from_shrunk_series(std::vector<SymbolIdWithView>& symbol_ids) const { | ||
| for (uint32_t ls_id = lss_.shrink_state().shift; ls_id < lss_.max_item_index(); ++ls_id) { | ||
| if (lss_.symbol_source_for_series(ls_id) != SymbolSource::kCurrent) { |
There was a problem hiding this comment.
Any added item after shrink will be with the type SymbolSource::kCurrent. It's redundant if
|
|
||
| using SymbolReference = PromPP::Prometheus::tsdb::index::SymbolReference; | ||
| using SymbolReferencesMap = phmap::flat_hash_map<ExportSymbolId, SymbolReference, ExportSymbolIdHasher>; | ||
| using SymbolIdWithView = std::pair<std::string_view, ExportSymbolId>; |
There was a problem hiding this comment.
std::string_view add for each symbol 16 bytes overhead. It may be expensive and should be benchmarked
| symbols_.clear(); | ||
| symbol_refs_.clear(); | ||
|
|
||
| std::vector<SymbolIdWithView> symbol_ids; |
There was a problem hiding this comment.
symbol_ids and methods collect_empty_symbol, collect_current_symbols, collect_snapshot_symbols can be extracted into separate class SymbolIdsCollector
| QueryableEncodingBimap lss_; | ||
| SymbolReferencesMap symbol_references_; | ||
| LabelIndicesWriter<QueryableEncodingBimap, decltype(stream_)> label_indices_writer{lss_, symbol_references_, stream_writer_}; | ||
| std::optional<series_index::prometheus::tsdb::index::IndexWriteContext<QueryableEncodingBimap>> index_write_context_; |
There was a problem hiding this comment.
std::optional not good pattern for unit tests. Instead of std::optional create variables in the place where they are used: TEST_P(LabelIndicesWriterFixture, Test).
Or you can manually call index_write_context_.rebuild() after lss filling
| @@ -49,16 +54,18 @@ class LabelIndicesWriterFixture : public testing::TestWithParam<LabelIndicesWrit | |||
|
|
|||
| std::ostringstream stream; | |||
| StreamWriter<decltype(stream_)> stream_writer{&stream}; | |||
| QueryableEncodingBimap lss_; | ||
| SymbolReferencesMap symbol_references_; | ||
| SeriesReferencesMap series_references_; | ||
| std::optional<series_index::prometheus::tsdb::index::IndexWriteContext<QueryableEncodingBimap>> index_write_context_; |
There was a problem hiding this comment.
use index_write_context_.rebuild instead of std::optional
| QueryableEncodingBimap lss_; | ||
| SymbolReferencesMap symbol_references_; | ||
| SeriesReferencesMap series_references_; | ||
| std::optional<series_index::prometheus::tsdb::index::IndexWriteContext<QueryableEncodingBimap>> index_write_context_; |
There was a problem hiding this comment.
use index_write_context_.rebuild instead of std::optional
| shrunk_lss_.finalize_copy_and_shrink(*snapshot_copy_, dst_src_ids_mapping); | ||
| } | ||
|
|
||
| static std::string write_index(const Lss& lss) { |
There was a problem hiding this comment.
You remove IndexWriter::write method, but implement it here. Better restore IndexWriter::write and use it with std::stringstream
| } | ||
| } | ||
|
|
||
| std::shared_ptr<Lss> load_lss_from_file() { |
There was a problem hiding this comment.
Why do we need std::unique_ptr?
| return {}; | ||
| } | ||
|
|
||
| void assert_added_series_suffix_marked(const Lss& lss, uint32_t begin_id) { |
There was a problem hiding this comment.
It's benchmark, not unit test. We should not validate data
| decoder_.process_segment(processor); | ||
| decoder_.process_segment([this, &processor](Primitives::LabelSetID ls_id, Primitives::Timestamp timestamp, double value) PROMPP_LAMBDA_INLINE { | ||
| if constexpr (requires(LSS& label_set, uint32_t id) { label_set.mark_active(id); }) { | ||
| label_set_.mark_active(ls_id); |
There was a problem hiding this comment.
Why we should do this in decoder?
| decoder_.process_segment([&last_ls_id, &samples, &container](uint32_t ls_id, int64_t ts, double v) PROMPP_LAMBDA_INLINE { | ||
| decoder_.process_segment([this, &last_ls_id, &samples, &container](uint32_t ls_id, int64_t ts, double v) PROMPP_LAMBDA_INLINE { | ||
| if constexpr (requires(LSS& label_set, uint32_t id) { label_set.mark_active(id); }) { | ||
| label_set_.mark_active(ls_id); |
There was a problem hiding this comment.
Why we should do this in GenericDecoder?
| inline __attribute__((always_inline)) const checkpoint_type& label_sets_checkpoint() const noexcept { return label_sets_checkpoint_; } | ||
|
|
||
| // Exclusive upper bound for label set ids written to WAL. | ||
| inline __attribute__((always_inline)) uint32_t max_item_index() const noexcept { return label_sets_checkpoint_.next_item_index(); } |
There was a problem hiding this comment.
| inline __attribute__((always_inline)) uint32_t max_item_index() const noexcept { return label_sets_checkpoint_.next_item_index(); } | |
| inline __attribute__((always_inline)) uint32_t max_written_item_index() const noexcept { return label_sets_checkpoint_.next_item_index(); } |
| @@ -0,0 +1,120 @@ | |||
| #include <benchmark/benchmark.h> | |||
There was a problem hiding this comment.
All comments also apply to the queryable_encoding_bimap_resolve_benchmark.cpp
| symbols_ids_sequences_type new_symbols_ids_sequences; | ||
| // reserve 3 extra bytes to avoid problems with streamvbyte | ||
| new_symbols_ids_sequences.reserve(symbols_ids_sequences_.size() - drop_seq_offset + 3); | ||
| std::ranges::copy(symbols_ids_sequences_.begin() + drop_seq_offset, symbols_ids_sequences_.end(), std::back_inserter(new_symbols_ids_sequences)); |
There was a problem hiding this comment.
I think resize + std::memcpy will be faster
| symbols_ids_sequences_.shrink_to_fit(); | ||
| Vector<item_type> new_items; | ||
| new_items.reserve(items_.size() - drop_count); | ||
| std::ranges::copy(items_.begin() + drop_count, items_.end(), std::back_inserter(new_items)); |
There was a problem hiding this comment.
I think resize + std::memcpy will be faster
| class ShrinkAwareSnapshotLSS : public SnapshotLSS { | ||
| public: | ||
| explicit ShrinkAwareSnapshotLSS(const QueryableEncodingBimap& lss) | ||
| : SnapshotLSS(lss), shrink_state_(lss.shrink_state().clone_for_snapshot()), added_series_(lss.added_series()) {} |
There was a problem hiding this comment.
inside ShrinkState we copy BareBones::Vector<uint32_t>. Use BareBones::SharedVector
| return resolve_shrunk_series(id); | ||
| } | ||
|
|
||
| if (shrink_state_.is_fixed() && is_hidden_in_fixed_state(id)) { |
There was a problem hiding this comment.
is_normal and is_fixed should have similar behavior
| } | ||
| } | ||
|
|
||
| extern "C" void prompp_head_wal_encoder_max_item_index(void* args, void* res) { |
There was a problem hiding this comment.
| extern "C" void prompp_head_wal_encoder_max_item_index(void* args, void* res) { | |
| extern "C" void prompp_head_wal_encoder_max_written_item_index(void* args, void* res) { |
| } | ||
|
|
||
| // makeRotatedLSS creates a rotated LSS from the original LSS. | ||
| func (s *RotateLSSSuite) makeRotatedLSS(lName string) *rotatedLSS { |
There was a problem hiding this comment.
Simplify method. Fill fields of rotatedLSS manually, not by code
There was a problem hiding this comment.
but filling in the structure is a constructor.
There was a problem hiding this comment.
Yes, but don't use loops, slice.Sort, if i%2 == 0 and other logic blocks. Only manually filling
Signed-off-by: Alexandr Yudin <57181751+u-veles-a@users.noreply.github.com>
Signed-off-by: Alexandr Yudin <57181751+u-veles-a@users.noreply.github.com>
No description provided.