properly handle duplicate page ranges
This commit is contained in:
parent
d1548a467b
commit
dcf6e13fa5
1 changed files with 21 additions and 2 deletions
|
@ -87,6 +87,7 @@ void gather_ntuple_metadata(Arena *arena, RMicroFileReader &reader, const RNTupl
|
|||
fprintf(stderr, "Loading pages...\n");
|
||||
|
||||
u64 n_pages = 0;
|
||||
u64 n_duplicate_page_ranges = 0;
|
||||
u64 n_elems = 0;
|
||||
u64 tot_page_comp_size = 0;
|
||||
Page_Info_Node *pinfo_head = nullptr, *pinfo_tail = nullptr;
|
||||
|
@ -124,6 +125,8 @@ void gather_ntuple_metadata(Arena *arena, RMicroFileReader &reader, const RNTupl
|
|||
pinfo->is_first_in_cluster = true;
|
||||
}
|
||||
|
||||
b8 duplicate = false;
|
||||
|
||||
if (UNLIKELY(!pinfo_head)) {
|
||||
// first node inserted
|
||||
assert(!pinfo_tail);
|
||||
|
@ -146,6 +149,11 @@ void gather_ntuple_metadata(Arena *arena, RMicroFileReader &reader, const RNTupl
|
|||
b8 pinfo_is_after_last = pinfo->range.start >= last_inserted_pinfo->range.end();
|
||||
if (pinfo_is_after_last) {
|
||||
for (Page_Info_Node *node = last_inserted_pinfo->next; node; node = node->next) {
|
||||
// sanity check for duplicate pages
|
||||
if (pinfo->range.start == node->range.start) {
|
||||
duplicate = true;
|
||||
break;
|
||||
}
|
||||
// check if `pinfo` fits right before the node we're looking at
|
||||
if (pinfo->range.end() <= node->range.start) {
|
||||
Page_Info_Node *prev = node->prev;
|
||||
|
@ -163,6 +171,11 @@ void gather_ntuple_metadata(Arena *arena, RMicroFileReader &reader, const RNTupl
|
|||
}
|
||||
} else {
|
||||
for (Page_Info_Node *node = last_inserted_pinfo; node; node = node->prev) {
|
||||
// sanity check for duplicate pages
|
||||
if (pinfo->range.start == node->range.start) {
|
||||
duplicate = true;
|
||||
break;
|
||||
}
|
||||
// check if `pinfo` fits right before the node we're looking at
|
||||
if (pinfo->range.end() <= node->range.start) {
|
||||
Page_Info_Node *prev = node->prev;
|
||||
|
@ -180,7 +193,12 @@ void gather_ntuple_metadata(Arena *arena, RMicroFileReader &reader, const RNTupl
|
|||
}
|
||||
}
|
||||
|
||||
assert(inserted);
|
||||
assert(inserted != duplicate);
|
||||
}
|
||||
|
||||
if (duplicate) {
|
||||
++n_duplicate_page_ranges;
|
||||
continue;
|
||||
}
|
||||
|
||||
last_inserted_pinfo = pinfo;
|
||||
|
@ -195,7 +213,8 @@ void gather_ntuple_metadata(Arena *arena, RMicroFileReader &reader, const RNTupl
|
|||
chr::time_point end_t = chr::high_resolution_clock::now();
|
||||
u64 time_spent_ms = chr::duration_cast<chr::milliseconds>(end_t - start_t).count();
|
||||
|
||||
fprintf(stderr, "Loaded %lu pages in %lu ms.\nGenerating groups...\n", n_pages, time_spent_ms);
|
||||
fprintf(stderr, "Loaded %lu pages in %lu ms (%lu duplicates).\nGenerating groups...\n",
|
||||
n_pages, time_spent_ms, n_duplicate_page_ranges);
|
||||
|
||||
// Create page groups and chunks.
|
||||
// Each page group is a grouping of GROUP_SIZE page infos whose range is equal to the combined ranges
|
||||
|
|
Loading…
Reference in a new issue