massively speed up page loading
This commit is contained in:
parent
16832d2132
commit
b708ab246f
2 changed files with 47 additions and 12 deletions
|
@ -103,7 +103,6 @@ void gather_ntuple_metadata(Arena *arena, RMicroFileReader &reader, const RNTupl
|
||||||
|
|
||||||
for (const RClusterDescriptor::RColumnRange &col_range : cluster_desc.GetColumnRangeIterable()) {
|
for (const RClusterDescriptor::RColumnRange &col_range : cluster_desc.GetColumnRangeIterable()) {
|
||||||
// insert page infos sorted by byte range
|
// insert page infos sorted by byte range
|
||||||
// @Speed: this is slow! speed it up!
|
|
||||||
const auto &page_range = cluster_desc.GetPageRange(col_range.fPhysicalColumnId);
|
const auto &page_range = cluster_desc.GetPageRange(col_range.fPhysicalColumnId);
|
||||||
for (const auto &page_info : page_range.fPageInfos) {
|
for (const auto &page_info : page_range.fPageInfos) {
|
||||||
const u64 checksum_size = sizeof(u64);
|
const u64 checksum_size = sizeof(u64);
|
||||||
|
@ -120,22 +119,54 @@ void gather_ntuple_metadata(Arena *arena, RMicroFileReader &reader, const RNTupl
|
||||||
} else if (pinfo->range.start >= pinfo_tail->range.end()) {
|
} else if (pinfo->range.start >= pinfo_tail->range.end()) {
|
||||||
// after tail
|
// after tail
|
||||||
pinfo_tail->next = pinfo;
|
pinfo_tail->next = pinfo;
|
||||||
|
pinfo->prev = pinfo_tail;
|
||||||
pinfo_tail = pinfo;
|
pinfo_tail = pinfo;
|
||||||
} else if (pinfo->range.end() <= pinfo_head->range.start) {
|
} else if (pinfo->range.end() <= pinfo_head->range.start) {
|
||||||
// before head
|
// before head
|
||||||
pinfo->next = pinfo_head;
|
pinfo->next = pinfo_head;
|
||||||
|
pinfo_head->prev = pinfo;
|
||||||
pinfo_head = pinfo;
|
pinfo_head = pinfo;
|
||||||
} else if (last_inserted_pinfo && pinfo->range.start == last_inserted_pinfo->range.end()) {
|
} else {
|
||||||
// common case: insert after previous
|
// Very commonly pages are already sorted either in increasing or decreasing order.
|
||||||
pinfo->next = last_inserted_pinfo->next;
|
// By starting to look from the last inserted page we are very likely to find the
|
||||||
last_inserted_pinfo->next = pinfo;
|
// proper slot immediately.
|
||||||
} else for (Page_Info_Node *node = pinfo_head->next, *prev = pinfo_head; node; prev = node, node = node->next) {
|
b8 inserted = false;
|
||||||
if (pinfo->range.end() <= node->range.start) {
|
b8 pinfo_is_after_last = pinfo->range.start >= last_inserted_pinfo->range.end();
|
||||||
prev->next = pinfo;
|
if (pinfo_is_after_last) {
|
||||||
pinfo->next = node;
|
for (Page_Info_Node *node = last_inserted_pinfo->next; node; node = node->next) {
|
||||||
++n_slow;
|
// check if `pinfo` fits right before the node we're looking at
|
||||||
break;
|
if (pinfo->range.end() <= node->range.start) {
|
||||||
|
Page_Info_Node *prev = node->prev;
|
||||||
|
if (UNLIKELY(!prev) || prev->range.end() <= pinfo->range.start) {
|
||||||
|
if (LIKELY(prev)) {
|
||||||
|
prev->next = pinfo;
|
||||||
|
pinfo->prev = prev;
|
||||||
|
}
|
||||||
|
pinfo->next = node;
|
||||||
|
inserted = true;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
for (Page_Info_Node *node = last_inserted_pinfo; node; node = node->prev) {
|
||||||
|
// check if `pinfo` fits right before the node we're looking at
|
||||||
|
if (pinfo->range.end() <= node->range.start) {
|
||||||
|
Page_Info_Node *prev = node->prev;
|
||||||
|
if (UNLIKELY(!prev) || prev->range.end() <= pinfo->range.start) {
|
||||||
|
if (LIKELY(prev)) {
|
||||||
|
prev->next = pinfo;
|
||||||
|
pinfo->prev = prev;
|
||||||
|
}
|
||||||
|
pinfo->next = node;
|
||||||
|
inserted = true;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
assert(inserted);
|
||||||
}
|
}
|
||||||
|
|
||||||
last_inserted_pinfo = pinfo;
|
last_inserted_pinfo = pinfo;
|
||||||
|
@ -171,8 +202,11 @@ void gather_ntuple_metadata(Arena *arena, RMicroFileReader &reader, const RNTupl
|
||||||
u64 n_chunks = 1;
|
u64 n_chunks = 1;
|
||||||
|
|
||||||
u64 idx = 1;
|
u64 idx = 1;
|
||||||
|
[[maybe_unused]] Page_Info_Node *prev = pinfo_head;
|
||||||
for (Page_Info_Node *pinfo = pinfo_head->next; pinfo; pinfo = pinfo->next) {
|
for (Page_Info_Node *pinfo = pinfo_head->next; pinfo; pinfo = pinfo->next) {
|
||||||
|
assert(prev->range.end() <= pinfo->range.start);
|
||||||
|
prev = pinfo;
|
||||||
|
|
||||||
if (pinfo->range.start != chunks_tail->range.end()) {
|
if (pinfo->range.start != chunks_tail->range.end()) {
|
||||||
// close current chunk and open new one
|
// close current chunk and open new one
|
||||||
Page_Info_Chunk *chunk = arena_push<Page_Info_Chunk>(arena);
|
Page_Info_Chunk *chunk = arena_push<Page_Info_Chunk>(arena);
|
||||||
|
|
|
@ -11,6 +11,7 @@ struct Range_Seq {
|
||||||
|
|
||||||
struct Page_Info_Node {
|
struct Page_Info_Node {
|
||||||
Page_Info_Node *next;
|
Page_Info_Node *next;
|
||||||
|
Page_Info_Node *prev;
|
||||||
|
|
||||||
Byte_Range range; // len includes checksum
|
Byte_Range range; // len includes checksum
|
||||||
i32 n_elems; // negative = page has checksum
|
i32 n_elems; // negative = page has checksum
|
||||||
|
|
Loading…
Reference in a new issue