properly handle duplicate page ranges
This commit is contained in:
parent
d1548a467b
commit
dcf6e13fa5
1 changed files with 21 additions and 2 deletions
|
@ -87,6 +87,7 @@ void gather_ntuple_metadata(Arena *arena, RMicroFileReader &reader, const RNTupl
|
||||||
fprintf(stderr, "Loading pages...\n");
|
fprintf(stderr, "Loading pages...\n");
|
||||||
|
|
||||||
u64 n_pages = 0;
|
u64 n_pages = 0;
|
||||||
|
u64 n_duplicate_page_ranges = 0;
|
||||||
u64 n_elems = 0;
|
u64 n_elems = 0;
|
||||||
u64 tot_page_comp_size = 0;
|
u64 tot_page_comp_size = 0;
|
||||||
Page_Info_Node *pinfo_head = nullptr, *pinfo_tail = nullptr;
|
Page_Info_Node *pinfo_head = nullptr, *pinfo_tail = nullptr;
|
||||||
|
@ -124,6 +125,8 @@ void gather_ntuple_metadata(Arena *arena, RMicroFileReader &reader, const RNTupl
|
||||||
pinfo->is_first_in_cluster = true;
|
pinfo->is_first_in_cluster = true;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
b8 duplicate = false;
|
||||||
|
|
||||||
if (UNLIKELY(!pinfo_head)) {
|
if (UNLIKELY(!pinfo_head)) {
|
||||||
// first node inserted
|
// first node inserted
|
||||||
assert(!pinfo_tail);
|
assert(!pinfo_tail);
|
||||||
|
@ -146,6 +149,11 @@ void gather_ntuple_metadata(Arena *arena, RMicroFileReader &reader, const RNTupl
|
||||||
b8 pinfo_is_after_last = pinfo->range.start >= last_inserted_pinfo->range.end();
|
b8 pinfo_is_after_last = pinfo->range.start >= last_inserted_pinfo->range.end();
|
||||||
if (pinfo_is_after_last) {
|
if (pinfo_is_after_last) {
|
||||||
for (Page_Info_Node *node = last_inserted_pinfo->next; node; node = node->next) {
|
for (Page_Info_Node *node = last_inserted_pinfo->next; node; node = node->next) {
|
||||||
|
// sanity check for duplicate pages
|
||||||
|
if (pinfo->range.start == node->range.start) {
|
||||||
|
duplicate = true;
|
||||||
|
break;
|
||||||
|
}
|
||||||
// check if `pinfo` fits right before the node we're looking at
|
// check if `pinfo` fits right before the node we're looking at
|
||||||
if (pinfo->range.end() <= node->range.start) {
|
if (pinfo->range.end() <= node->range.start) {
|
||||||
Page_Info_Node *prev = node->prev;
|
Page_Info_Node *prev = node->prev;
|
||||||
|
@ -163,6 +171,11 @@ void gather_ntuple_metadata(Arena *arena, RMicroFileReader &reader, const RNTupl
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
for (Page_Info_Node *node = last_inserted_pinfo; node; node = node->prev) {
|
for (Page_Info_Node *node = last_inserted_pinfo; node; node = node->prev) {
|
||||||
|
// sanity check for duplicate pages
|
||||||
|
if (pinfo->range.start == node->range.start) {
|
||||||
|
duplicate = true;
|
||||||
|
break;
|
||||||
|
}
|
||||||
// check if `pinfo` fits right before the node we're looking at
|
// check if `pinfo` fits right before the node we're looking at
|
||||||
if (pinfo->range.end() <= node->range.start) {
|
if (pinfo->range.end() <= node->range.start) {
|
||||||
Page_Info_Node *prev = node->prev;
|
Page_Info_Node *prev = node->prev;
|
||||||
|
@ -180,7 +193,12 @@ void gather_ntuple_metadata(Arena *arena, RMicroFileReader &reader, const RNTupl
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
assert(inserted);
|
assert(inserted != duplicate);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (duplicate) {
|
||||||
|
++n_duplicate_page_ranges;
|
||||||
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
last_inserted_pinfo = pinfo;
|
last_inserted_pinfo = pinfo;
|
||||||
|
@ -195,7 +213,8 @@ void gather_ntuple_metadata(Arena *arena, RMicroFileReader &reader, const RNTupl
|
||||||
chr::time_point end_t = chr::high_resolution_clock::now();
|
chr::time_point end_t = chr::high_resolution_clock::now();
|
||||||
u64 time_spent_ms = chr::duration_cast<chr::milliseconds>(end_t - start_t).count();
|
u64 time_spent_ms = chr::duration_cast<chr::milliseconds>(end_t - start_t).count();
|
||||||
|
|
||||||
fprintf(stderr, "Loaded %lu pages in %lu ms.\nGenerating groups...\n", n_pages, time_spent_ms);
|
fprintf(stderr, "Loaded %lu pages in %lu ms (%lu duplicates).\nGenerating groups...\n",
|
||||||
|
n_pages, time_spent_ms, n_duplicate_page_ranges);
|
||||||
|
|
||||||
// Create page groups and chunks.
|
// Create page groups and chunks.
|
||||||
// Each page group is a grouping of GROUP_SIZE page infos whose range is equal to the combined ranges
|
// Each page group is a grouping of GROUP_SIZE page infos whose range is equal to the combined ranges
|
||||||
|
|
Loading…
Reference in a new issue