fix bug in RMicroFileReader

This commit is contained in:
silverweed 2024-07-12 18:29:35 +02:00
parent 58ef95409d
commit 36a34cedd3
8 changed files with 123 additions and 29 deletions

View file

@ -32,6 +32,9 @@ $(ROOT_IFACE): src/root/RMicroFileReader.cxx
d: $(ROOT_IFACE_DBG) build/imgui.o d: $(ROOT_IFACE_DBG) build/imgui.o
$(MOLD) $(CXX) -DDEBUG -g -O0 -DENABLE_ASAN $(CFLAGS) $(INC) $(ROOTFLAGS) -o rntviewer src/rntviewer.cpp build/imgui.o -lasan $(ROOT_IFACE_DBG) $(LIBS) $(ROOTLIBS) $(MOLD) $(CXX) -DDEBUG -g -O0 -DENABLE_ASAN $(CFLAGS) $(INC) $(ROOTFLAGS) -o rntviewer src/rntviewer.cpp build/imgui.o -lasan $(ROOT_IFACE_DBG) $(LIBS) $(ROOTLIBS)
asan: $(ROOT_IFACE_DBG) build/imgui.o
$(MOLD) $(CXX) -DDEBUG -g -O0 $(CFLAGS) -fsanitize=address $(INC) $(ROOTFLAGS) -o rntviewer src/rntviewer.cpp build/imgui.o $(ROOT_IFACE_DBG) $(LIBS) $(ROOTLIBS)
noasan: $(ROOT_IFACE_DBG) build/imgui.o noasan: $(ROOT_IFACE_DBG) build/imgui.o
$(MOLD) $(CXX) -DDEBUG -g -O0 $(CFLAGS) -fsanitize=undefined $(INC) $(ROOTFLAGS) -o rntviewer src/rntviewer.cpp build/imgui.o $(ROOT_IFACE_DBG) $(LIBS) $(ROOTLIBS) $(MOLD) $(CXX) -DDEBUG -g -O0 $(CFLAGS) -fsanitize=undefined $(INC) $(ROOTFLAGS) -o rntviewer src/rntviewer.cpp build/imgui.o $(ROOT_IFACE_DBG) $(LIBS) $(ROOTLIBS)

View file

@ -120,24 +120,7 @@ void *arena_push_impl(Arena *arena, u64 size)
template <typename T> template <typename T>
internal internal
T *arena_push(Arena *arena) T *arena_push_array_nozero(Arena *arena, u64 count)
{
void *mem = arena_push_impl(arena, sizeof(T));
return (T *)mem;
}
template <typename T>
internal
T *arena_push_zeroed(Arena *arena)
{
T *mem = arena_push<T>(arena);
memset(mem, 0, sizeof(T));
return mem;
}
template <typename T>
internal
T *arena_push_array_no_zero(Arena *arena, u64 count)
{ {
void *mem = arena_push_impl(arena, sizeof(T) * count); void *mem = arena_push_impl(arena, sizeof(T) * count);
return (T *)mem; return (T *)mem;
@ -147,11 +130,25 @@ template <typename T>
internal internal
T *arena_push_array(Arena *arena, u64 count) T *arena_push_array(Arena *arena, u64 count)
{ {
T *ary = arena_push_array_no_zero<T>(arena, count); T *ary = arena_push_array_nozero<T>(arena, count);
memset(ary, 0, sizeof(T) * count); memset(ary, 0, sizeof(T) * count);
return ary; return ary;
} }
template <typename T>
internal
T *arena_push(Arena *arena)
{
return arena_push_array<T>(arena, 1);
}
template <typename T>
internal
T *arena_push_nozero(Arena *arena)
{
return arena_push_array_nozero<T>(arena, 1);
}
internal internal
u8 *arena_push_contiguous(Arena *arena, u64 size) u8 *arena_push_contiguous(Arena *arena, u64 size)
{ {

View file

@ -47,6 +47,17 @@ u32 mem_edit_bg_color_fn(const u8 *, u64 off, const void *user_data)
if (rdata.rng_anchor.start <= off && off <= rdata.rng_anchor.end()) return COL(app->vsettings.col_anchor); if (rdata.rng_anchor.start <= off && off <= rdata.rng_anchor.end()) return COL(app->vsettings.col_anchor);
if (rdata.rng_header.start <= off && off <= rdata.rng_header.end()) return COL(app->vsettings.col_header); if (rdata.rng_header.start <= off && off <= rdata.rng_header.end()) return COL(app->vsettings.col_header);
if (rdata.rng_footer.start <= off && off <= rdata.rng_footer.end()) return COL(app->vsettings.col_footer); if (rdata.rng_footer.start <= off && off <= rdata.rng_footer.end()) return COL(app->vsettings.col_footer);
// @Speed!
for (u64 group_idx = 0; group_idx < rdata.n_page_groups; ++group_idx) {
const Page_Info_Group &group = rdata.page_groups[group_idx];
if (off < group.range.start || off > group.range.end())
continue;
for (Page_Info_Node *pinfo = group.first; pinfo; pinfo = pinfo->next) {
if (pinfo->range.start == off) return COL(app->vsettings.col_page_start);
if (pinfo->range.start < off && off <= pinfo->range.end()) return COL(app->vsettings.col_page);
}
}
#undef COL #undef COL
return IM_COL32(0, 0, 0, 0); return IM_COL32(0, 0, 0, 0);
@ -74,6 +85,8 @@ Viewer_Settings make_viewer_settings()
COL(col_footer, 50, 0, 150); COL(col_footer, 50, 0, 150);
COL(col_key, 0, 100, 50); COL(col_key, 0, 100, 50);
COL(col_tfile, 90, 90, 90); COL(col_tfile, 90, 90, 90);
COL(col_page, 125, 0, 125);
COL(col_page_start, 75, 0, 75);
#undef COL #undef COL
return settings; return settings;
} }
@ -91,7 +104,7 @@ void update_and_render(Arena *arena, App_State &app, f32 delta_time_ms)
const u64 inspected_max_size = 2048; const u64 inspected_max_size = 2048;
u64 text_buf_size = min(app.inspected_file_size * 2, inspected_max_size); u64 text_buf_size = min(app.inspected_file_size * 2, inspected_max_size);
char *text_buf = arena_push_array_no_zero<char>(scratch.arena, text_buf_size + 1); char *text_buf = arena_push_array_nozero<char>(scratch.arena, text_buf_size + 1);
// Convert file content to human readable // Convert file content to human readable
// @Speed: maybe do this only when the file changes // @Speed: maybe do this only when the file changes
for (u64 i = 0; i < text_buf_size / 2; ++i) for (u64 i = 0; i < text_buf_size / 2; ++i)
@ -160,6 +173,10 @@ void update_and_render(Arena *arena, App_State &app, f32 delta_time_ms)
ImGui::SameLine(); ImGui::SameLine();
if (ImGui::Button("TKey Header")) {} // TODO app.vsettings.base_display_addr = app.rndatarng_footer.start; if (ImGui::Button("TKey Header")) {} // TODO app.vsettings.base_display_addr = app.rndatarng_footer.start;
ImGui::Separator();
ImGui::Text("Num pages: %lu", app.rndata.n_pages);
ImGui::Text("Num elements: %lu", app.rndata.n_elems);
ImGui::EndTable(); ImGui::EndTable();
} }

View file

@ -4,6 +4,8 @@ struct Viewer_Settings {
float col_footer[3]; float col_footer[3];
float col_key[3]; float col_key[3];
float col_tfile[3]; float col_tfile[3];
float col_page[3];
float col_page_start[3];
u64 base_display_addr; u64 base_display_addr;
}; };

View file

@ -63,26 +63,80 @@ ROOT::Experimental::RNTupleDescriptor create_descriptor(Arena *arena, RMicroFile
} }
internal internal
void gather_metadata(Arena *arena, RMicroFileReader &reader, const RNTuple_File_Info &info) void gather_metadata(Arena *arena, RMicroFileReader &reader, const RNTuple_File_Info &info, RNTuple_Data &rndata)
{ {
using namespace ROOT::Experimental; using namespace ROOT::Experimental;
using namespace ROOT::Experimental::Internal; using namespace ROOT::Experimental::Internal;
RNTupleDescriptor descriptor = create_descriptor(arena, reader, info); RNTupleDescriptor descriptor = create_descriptor(arena, reader, info);
u64 n_pages = 0;
u64 n_elems = 0;
Page_Info_Node *pinfo_head = nullptr, *pinfo_tail = nullptr;
// for all clusters ... // for all clusters ...
DescriptorId_t cluster_id = descriptor.FindClusterId(0, 0); for (const RClusterDescriptor &cluster_desc : descriptor.GetClusterIterable()) {
while (cluster_id != kInvalidDescriptorId) {
const RClusterDescriptor &cluster_desc = descriptor.GetClusterDescriptor(cluster_id);
for (const auto &[col_id, col_range] : cluster_desc.GetColumnRangeIterable()) { for (const auto &[col_id, col_range] : cluster_desc.GetColumnRangeIterable()) {
// TODO gather column metadata // TODO gather column metadata
// TODO gather page metadata // TODO gather page metadata
// fprintf(stderr, "col_id: %d\n", col_id);
// TODO!! insert page_info sorted by byte range!
const auto &page_range = cluster_desc.GetPageRange(col_id); const auto &page_range = cluster_desc.GetPageRange(col_id);
for (const auto &page_info : page_range.fPageInfos) { for (const auto &page_info : page_range.fPageInfos) {
Page_Info_Node *pinfo = arena_push<Page_Info_Node>(arena);
pinfo->range.start = page_info.fLocator.GetPosition<u64>();
pinfo->range.len = page_info.fLocator.fBytesOnStorage;
pinfo->n_elems = page_info.fNElements;
if (pinfo_head) {
assert(pinfo_tail);
if (pinfo->range.start < pinfo_head->range.start) {
pinfo->next = pinfo_head;
pinfo_head = pinfo;
} else {
assert(pinfo->range.start > pinfo_tail->range.end());
pinfo_tail->next = pinfo;
pinfo_tail = pinfo;
}
} else {
assert(!pinfo_tail);
pinfo_head = pinfo_tail = pinfo;
}
++n_pages;
n_elems += page_info.fNElements;
} }
} }
} }
fprintf(stderr, "Loaded %lu pages\n", n_pages);
// Create page groups
const u64 GROUP_SIZE = 1000;
Page_Info_Group *groups = arena_push_array_nozero<Page_Info_Group>(arena, n_pages / GROUP_SIZE + 1);
Page_Info_Group *cur_group = groups, *prev_group = nullptr;
Page_Info_Node *last = nullptr;
u64 idx = 0;
u64 n_groups = 0;
for (Page_Info_Node *pinfo = pinfo_head; pinfo; pinfo = pinfo->next) {
if (idx++ % GROUP_SIZE == 0) {
++n_groups;
cur_group->first = pinfo;
cur_group->range.start = pinfo->range.start;
if (prev_group)
prev_group->range.len = cur_group->first - prev_group->first;
prev_group = cur_group;
printf("group %lu -> %lu\n", cur_group->range.start, cur_group->range.end());
++cur_group;
}
last = pinfo;
}
if (last)
prev_group->range.len = last->range.end() - prev_group->range.start;
rndata.pages = pinfo_head;
rndata.page_groups = groups;
rndata.n_page_groups = n_groups;
rndata.n_pages = n_pages;
rndata.n_elems = n_elems;
} }
internal internal
@ -109,7 +163,7 @@ RNTuple_Data get_rntuple_data(Arena *arena, const char *fname, const char *ntpl_
rndata.rblob_header_size = file_info.rblob_key_header_nbytes; rndata.rblob_header_size = file_info.rblob_key_header_nbytes;
rndata.root_file_header_size = file_info.tfile_header_nbytes; rndata.root_file_header_size = file_info.tfile_header_nbytes;
gather_metadata(arena, file_reader, file_info); gather_metadata(arena, file_reader, file_info, rndata);
} }
return rndata; return rndata;

View file

@ -5,6 +5,18 @@ struct Byte_Range {
u64 end() const { return start + len; } u64 end() const { return start + len; }
}; };
struct Page_Info_Node {
Page_Info_Node *next;
Byte_Range range;
u64 n_elems;
};
struct Page_Info_Group {
Byte_Range range;
Page_Info_Node *first;
};
struct RNTuple_Data { struct RNTuple_Data {
struct { struct {
u16 epoch, major, minor, patch; u16 epoch, major, minor, patch;
@ -16,4 +28,12 @@ struct RNTuple_Data {
Byte_Range rng_anchor_key; Byte_Range rng_anchor_key;
Byte_Range rng_header; Byte_Range rng_header;
Byte_Range rng_footer; Byte_Range rng_footer;
Page_Info_Node *pages;
u64 n_pages;
// total number of elements of all pages
u64 n_elems;
Page_Info_Group *page_groups;
u64 n_page_groups;
}; };

View file

@ -1070,7 +1070,7 @@ static size_t ComputeNumChunks(size_t nbytes, size_t maxChunkSize)
struct RMicroFileReader::Impl { struct RMicroFileReader::Impl {
std::unique_ptr<ROOT::Internal::RRawFile> fRawFile; std::unique_ptr<ROOT::Internal::RRawFile> fRawFile;
std::uint64_t fMaxBlobSize; std::uint64_t fMaxBlobSize = 0;
}; };
RMicroFileReader::RMicroFileReader(const char *fname) { RMicroFileReader::RMicroFileReader(const char *fname) {
@ -1221,6 +1221,7 @@ void RMicroFileReader::ReadBuffer(void *buffer, size_t nbytes, std::uint64_t off
{ {
size_t nread; size_t nread;
uint64_t maxBlobSize = impl->fMaxBlobSize; uint64_t maxBlobSize = impl->fMaxBlobSize;
assert(maxBlobSize != -1);
if (maxBlobSize == 0 || nbytes <= maxBlobSize) { if (maxBlobSize == 0 || nbytes <= maxBlobSize) {
// Fast path: read single blob // Fast path: read single blob
nread = impl->fRawFile->ReadAt(buffer, nbytes, offset); nread = impl->fRawFile->ReadAt(buffer, nbytes, offset);

View file

@ -5,7 +5,7 @@ String8 push_str8fv(Arena *arena, const char *fmt, va_list args)
va_copy(args2, args); va_copy(args2, args);
u32 needed_bytes = vsnprintf(0, 0, fmt, args) + 1; u32 needed_bytes = vsnprintf(0, 0, fmt, args) + 1;
String8 result = {}; String8 result = {};
result.str = arena_push_array_no_zero<u8>(arena, needed_bytes); result.str = arena_push_array_nozero<u8>(arena, needed_bytes);
result.size = vsnprintf((char*)result.str, needed_bytes, fmt, args2); result.size = vsnprintf((char*)result.str, needed_bytes, fmt, args2);
result.str[result.size] = 0; result.str[result.size] = 0;
va_end(args2); va_end(args2);