internal String8 rntuple_description(Arena *arena, const RNTuple_Data &ntuple) { String8 desc = push_str8f(arena, "version %u.%u.%u.%u", ntuple.version.epoch, ntuple.version.major, ntuple.version.minor, ntuple.version.patch); return desc; } internal ROOT::Experimental::RNTupleDescriptor create_descriptor(Arena *arena, RMicroFileReader &reader, const RNTuple_File_Info &info) { using namespace ROOT::Experimental; using namespace ROOT::Experimental::Internal; Temp scratch = temp_begin(arena); defer { temp_end(scratch); }; const RNTuple_Anchor &anchor = info.anchor; // Read compressed header+footer u8 *header_zip = arena_push_contiguous(scratch.arena, anchor.fNBytesHeader); u8 *footer_zip = arena_push_contiguous(scratch.arena, anchor.fNBytesFooter); reader.ReadBuffer(header_zip, anchor.fNBytesHeader, anchor.fSeekHeader); reader.ReadBuffer(footer_zip, anchor.fNBytesFooter, anchor.fSeekFooter); // Decompress header+footer u8 *header = arena_push_contiguous(scratch.arena, anchor.fLenHeader); u8 *footer = arena_push_contiguous(scratch.arena, anchor.fLenFooter); RNTupleDecompressor::Unzip(header_zip, anchor.fNBytesHeader, anchor.fLenHeader, header); RNTupleDecompressor::Unzip(footer_zip, anchor.fNBytesFooter, anchor.fLenFooter, footer); // Deserialize header+footer RNTupleDescriptorBuilder desc_builder; RNTupleSerializer::DeserializeHeader(header, anchor.fLenHeader, desc_builder); RNTupleSerializer::DeserializeFooter(footer, anchor.fLenFooter, desc_builder); RNTupleDescriptor descriptor = desc_builder.MoveDescriptor(); for (const RClusterGroupDescriptor &cgdesc : descriptor.GetClusterGroupIterable()) { u64 arena_start = arena_pos(scratch.arena); // Read page list u64 page_list_zip_size = cgdesc.GetPageListLocator().fBytesOnStorage; u64 page_list_seek = cgdesc.GetPageListLocator().GetPosition(); u8 *page_list_zip = arena_push_contiguous(scratch.arena, page_list_zip_size); reader.ReadBuffer(page_list_zip, page_list_zip_size, page_list_seek); // Decompress page list u64 page_list_len = cgdesc.GetPageListLength(); u8 *page_list = arena_push_contiguous(scratch.arena, page_list_len); RNTupleDecompressor::Unzip(page_list_zip, page_list_zip_size, page_list_len, page_list); // Deserialize page list DescriptorId_t cluster_grpid = cgdesc.GetId(); RNTupleSerializer::DeserializePageList(page_list, page_list_len, cluster_grpid, descriptor); arena_pop_to(scratch.arena, arena_start); } return descriptor; } internal void gather_metadata(Arena *arena, RMicroFileReader &reader, const RNTuple_File_Info &info, RNTuple_Data &rndata) { using namespace ROOT::Experimental; using namespace ROOT::Experimental::Internal; RNTupleDescriptor descriptor = create_descriptor(arena, reader, info); u64 n_pages = 0; u64 n_elems = 0; Page_Info_Node *pinfo_head = nullptr, *pinfo_tail = nullptr; // for all clusters ... for (const RClusterDescriptor &cluster_desc : descriptor.GetClusterIterable()) { for (const auto &[col_id, col_range] : cluster_desc.GetColumnRangeIterable()) { // TODO gather column metadata // TODO gather page metadata // fprintf(stderr, "col_id: %d\n", col_id); // TODO!! insert page_info sorted by byte range! const auto &page_range = cluster_desc.GetPageRange(col_id); for (const auto &page_info : page_range.fPageInfos) { Page_Info_Node *pinfo = arena_push(arena); pinfo->range.start = page_info.fLocator.GetPosition(); pinfo->range.len = page_info.fLocator.fBytesOnStorage; pinfo->n_elems = page_info.fNElements; if (pinfo_head) { assert(pinfo_tail); if (pinfo->range.start < pinfo_head->range.start) { pinfo->next = pinfo_head; pinfo_head = pinfo; } else { assert(pinfo->range.start > pinfo_tail->range.end()); pinfo_tail->next = pinfo; pinfo_tail = pinfo; } } else { assert(!pinfo_tail); pinfo_head = pinfo_tail = pinfo; } ++n_pages; n_elems += page_info.fNElements; } } } fprintf(stderr, "Loaded %lu pages\n", n_pages); // Create page groups const u64 GROUP_SIZE = 1000; Page_Info_Group *groups = arena_push_array_nozero(arena, n_pages / GROUP_SIZE + 1); Page_Info_Group *cur_group = groups, *prev_group = nullptr; Page_Info_Node *last = nullptr; u64 idx = 0; u64 n_groups = 0; for (Page_Info_Node *pinfo = pinfo_head; pinfo; pinfo = pinfo->next) { if (idx++ % GROUP_SIZE == 0) { ++n_groups; cur_group->first = pinfo; cur_group->range.start = pinfo->range.start; if (prev_group) prev_group->range.len = cur_group->first - prev_group->first; prev_group = cur_group; printf("group %lu -> %lu\n", cur_group->range.start, cur_group->range.end()); ++cur_group; } last = pinfo; } if (last) prev_group->range.len = last->range.end() - prev_group->range.start; rndata.pages = pinfo_head; rndata.page_groups = groups; rndata.n_page_groups = n_groups; rndata.n_pages = n_pages; rndata.n_elems = n_elems; } internal RNTuple_Data get_rntuple_data(Arena *arena, const char *fname, const char *ntpl_name) { RNTuple_Data rndata = {}; // TODO: proper error handling RMicroFileReader file_reader { fname }; RNTuple_File_Info file_info = file_reader.GetNTupleProper(ntpl_name); if (!file_info.failed) { rndata.version.epoch = file_info.anchor.fVersionEpoch; rndata.version.major = file_info.anchor.fVersionMajor; rndata.version.minor = file_info.anchor.fVersionMinor; rndata.version.patch = file_info.anchor.fVersionPatch; rndata.rng_header.start = file_info.anchor.fSeekHeader; rndata.rng_header.len = file_info.anchor.fNBytesHeader; rndata.rng_footer.start = file_info.anchor.fSeekFooter; rndata.rng_footer.len = file_info.anchor.fNBytesFooter; rndata.rng_anchor.start = file_info.anchor_seek; rndata.rng_anchor.len = file_info.anchor_nbytes; rndata.rng_anchor_key.start = file_info.anchor_key_seek; rndata.rng_anchor_key.len = file_info.anchor_key_nbytes; rndata.rblob_header_size = file_info.rblob_key_header_nbytes; rndata.root_file_header_size = file_info.tfile_header_nbytes; gather_metadata(arena, file_reader, file_info, rndata); } return rndata; }