diff --git a/include/essentials.hpp b/include/essentials.hpp index e1e9aff..18bf1df 100644 --- a/include/essentials.hpp +++ b/include/essentials.hpp @@ -7,6 +7,7 @@ #include #include #include +#include #include #include #include @@ -120,14 +121,81 @@ static void save_pod(std::ostream& os, T const& val) { os.write(reinterpret_cast(&val), sizeof(T)); } -template -static void save_vec(std::ostream& os, std::vector const& vec) { - static_assert(is_pod::value); - size_t n = vec.size(); - save_pod(os, n); - os.write(reinterpret_cast(vec.data()), - static_cast(sizeof(T) * n)); -} + + +/* + A read-only span with optional shared ownership. + After construction, only const access is permitted. + + Three ownership models via shared_ptr's aliasing constructor: + 1. Heap-owned: constructed from an rvalue contiguous range (e.g. vector) — + the range is heap-allocated inside a shared_ptr, and the span points + into its buffer. + 2. Externally-owned: constructed from a raw pointer + shared_ptr owner + (e.g., an mmap context) — the span keeps the owner alive. + 3. Unowned: constructed from a raw pointer without owner — the caller + must ensure the backing memory outlives the span. + + All models yield the same branch-free const T* access path. +*/ +template +class owning_span { + std::shared_ptr m_data; + size_t m_size = 0; + + template + using has_contiguous_data = std::enable_if_t< + !std::is_same_v, owning_span> && + std::is_convertible_v&>().data()), const T*> && + std::is_convertible_v&>().size()), size_t>>; + +public: + using value_type = T; + using size_type = size_t; + using const_iterator = const T*; + + owning_span() = default; + + /* Take ownership of any contiguous range (vector, array, string, ...). + Rvalues are moved; lvalues are copied. */ + template > + owning_span(Range&& r) { + if (r.size() == 0) return; + auto p = std::make_shared>(std::forward(r)); + m_size = p->size(); + const T* ptr = p->data(); + m_data = std::shared_ptr(std::move(p), ptr); + } + + /* View into externally-managed memory, optionally keeping owner alive. */ + owning_span(const T* data, size_t n, + std::shared_ptr owner = {}) + : m_size(n) + , m_data(std::move(owner), data) {} + + const T* data() const { return m_data.get(); } + size_t size() const { return m_size; } + bool empty() const { return m_size == 0; } + const T& operator[](size_t i) const { return m_data[i]; } + const T& front() const { return m_data[0]; } + const T& back() const { return m_data[m_size - 1]; } + const_iterator begin() const { return data(); } + const_iterator end() const { return data() + m_size; } + + void swap(owning_span& other) { + m_data.swap(other.m_data); + std::swap(m_size, other.m_size); + } + + void clear() { m_data.reset(); m_size = 0; } +}; + +template +struct is_owning_span : std::false_type {}; +template +struct is_owning_span> : std::true_type {}; +template +inline constexpr bool is_owning_span_v = is_owning_span::value; struct json_lines { struct property { @@ -282,7 +350,16 @@ struct generic_loader { generic_loader(std::istream& is) : m_num_bytes_pods(0) , m_num_bytes_vecs_of_pods(0) - , m_is(is) {} + , m_is(is) + , m_mmap_base(nullptr) + , m_mmap_size(0) {} + + void set_mmap(const uint8_t* base, size_t size, + std::shared_ptr owner = {}) { + m_mmap_base = base; + m_mmap_size = size; + m_mmap_owner = std::move(owner); + } template void visit(T& val) { @@ -295,18 +372,10 @@ struct generic_loader { } template - void visit(std::vector& vec) { - size_t n; - visit(n); - vec.resize(n); - if constexpr (is_pod::value) { - m_is.read(reinterpret_cast(vec.data()), - static_cast(sizeof(T) * n)); - m_num_bytes_vecs_of_pods += n * sizeof(T); - } else { - for (auto& v : vec) visit(v); - } - } + void visit(std::vector& vec) { visit_seq(vec); } + + template + void visit(owning_span& vec) { visit_seq(vec); } size_t bytes() { return m_is.tellg(); @@ -320,10 +389,42 @@ struct generic_loader { return m_num_bytes_vecs_of_pods; } + bool is_mmap() const { return m_mmap_base != nullptr; } + private: size_t m_num_bytes_pods; size_t m_num_bytes_vecs_of_pods; std::istream& m_is; + const uint8_t* m_mmap_base; + size_t m_mmap_size; + std::shared_ptr m_mmap_owner; + + template + void visit_seq(Vec& vec) { + using T = typename Vec::value_type; + size_t n; + visit(n); + if constexpr (is_owning_span_v) { + if (is_mmap()) { + assert(is_pod::value); + auto offset = static_cast(m_is.tellg()); + vec = Vec(reinterpret_cast(m_mmap_base + offset), n, + m_mmap_owner); + m_is.seekg(static_cast(offset + n * sizeof(T))); + m_num_bytes_vecs_of_pods += n * sizeof(T); + return; + } + } + std::vector tmp(n); + if constexpr (is_pod::value) { + m_is.read(reinterpret_cast(tmp.data()), + static_cast(sizeof(T) * n)); + m_num_bytes_vecs_of_pods += n * sizeof(T); + } else { + for (auto& v : tmp) visit(v); + } + vec = Vec(std::move(tmp)); + } }; struct loader : generic_loader { @@ -355,15 +456,10 @@ struct generic_saver { } template - void visit(std::vector const& vec) { - if constexpr (is_pod::value) { - save_vec(m_os, vec); - } else { - size_t n = vec.size(); - visit(n); - for (auto& v : vec) visit(v); - } - } + void visit(std::vector const& vec) { visit_seq(vec); } + + template + void visit(owning_span const& vec) { visit_seq(vec); } size_t bytes() { return m_os.tellp(); @@ -371,6 +467,19 @@ struct generic_saver { private: std::ostream& m_os; + + template + void visit_seq(Vec const& vec) { + using T = typename Vec::value_type; + size_t n = vec.size(); + visit(n); + if constexpr (is_pod::value) { + m_os.write(reinterpret_cast(vec.data()), + static_cast(sizeof(T) * n)); + } else { + for (auto const& v : vec) visit(v); + } + } }; struct saver : generic_saver { @@ -425,25 +534,10 @@ struct sizer { } template - void visit(std::vector& vec) { - if constexpr (is_pod::value) { - node n(vec_bytes(vec), m_current->depth + 1, demangle(typeid(std::vector).name())); - m_current->children.push_back(n); - m_current->bytes += n.bytes; - } else { - size_t n = vec.size(); - m_current->bytes += pod_bytes(n); - node* parent = m_current; - for (auto& v : vec) { - node n(0, parent->depth + 1, demangle(typeid(T).name())); - parent->children.push_back(n); - m_current = &parent->children.back(); - visit(v); - parent->bytes += m_current->bytes; - } - m_current = parent; - } - } + void visit(std::vector& vec) { visit_seq(vec); } + + template + void visit(owning_span& vec) { visit_seq(vec); } template void print(node const& n, size_t total_bytes, Device& device) const { @@ -468,6 +562,28 @@ struct sizer { private: node m_root; node* m_current; + + template + void visit_seq(Vec& vec) { + using T = typename Vec::value_type; + if constexpr (is_pod::value) { + node n(vec_bytes(vec), m_current->depth + 1, demangle(typeid(Vec).name())); + m_current->children.push_back(n); + m_current->bytes += n.bytes; + } else { + size_t n = vec.size(); + m_current->bytes += pod_bytes(n); + node* parent = m_current; + for (auto& v : vec) { + node nd(0, parent->depth + 1, demangle(typeid(T).name())); + parent->children.push_back(nd); + m_current = &parent->children.back(); + visit(v); + parent->bytes += m_current->bytes; + } + m_current = parent; + } + } }; template @@ -539,6 +655,17 @@ struct contiguous_memory_allocator { } } + template + void visit(owning_span& vec) { + size_t n; + load_pod(m_is, n); + std::vector tmp(n); + m_is.read(reinterpret_cast(tmp.data()), + static_cast(sizeof(T) * n)); + consume(n * sizeof(T)); + vec = owning_span(std::move(tmp)); + } + uint8_t* end() { return m_end; }