Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions .github/workflows/codeql.yml
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ jobs:
submodules: recursive

- name: Initialize CodeQL
uses: github/codeql-action/init@v2
uses: github/codeql-action/init@v3
with:
languages: 'cpp'

Expand All @@ -37,6 +37,6 @@ jobs:

# Perform Analysis
- name: Perform CodeQL Analysis
uses: github/codeql-action/analyze@v2
uses: github/codeql-action/analyze@v3
with:
category: "/language:cpp"
2 changes: 1 addition & 1 deletion CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ endif()
set(CMAKE_RUNTIME_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR})

if (UNIX AND (CMAKE_HOST_SYSTEM_PROCESSOR STREQUAL "x86_64"))
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -march=native")
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -march=native -mno-avx512f") # disable AVX512
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -mbmi2 -msse4.2 -mpopcnt")
endif()

Expand Down
2 changes: 1 addition & 1 deletion include/build_util.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@

namespace fulgor {

void build_reference_sketches(index_type const& index,
void build_reference_sketches(hfur_index_t const& index,
uint64_t p, // use 2^p bytes per HLL sketch
uint64_t num_threads, // num. threads for construction
std::string output_filename // where the sketches will be serialized
Expand Down
4 changes: 2 additions & 2 deletions include/builders/differential_builder.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -162,7 +162,7 @@ struct differential_permuter {
essentials::timer<std::chrono::high_resolution_clock, std::chrono::milliseconds> timer;
timer.start();
if (num_points == 0) {
std::cout << "Found empty partition" << endl;
std::cout << "Found empty partition" << std::endl;
clustering_data.num_clusters = 0;
clustering_data.clusters = {};
return 0;
Expand Down Expand Up @@ -206,7 +206,7 @@ struct index<ColorSets>::differential_builder {

const uint32_t num_threads = m_build_config.num_threads;

index_type index;
hfur_index_t index;
essentials::logger("step 1. loading index to be differentiated...");
essentials::load(index, m_build_config.index_filename_to_partition.c_str());
essentials::logger("DONE");
Expand Down
10 changes: 5 additions & 5 deletions include/builders/meta_builder.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ struct permuter {
permuter(build_configuration const& build_config)
: m_build_config(build_config), m_num_partitions(0), m_max_partition_size(0) {}

void permute(index_type const& index) {
void permute(hfur_index_t const& index) {
essentials::timer<std::chrono::high_resolution_clock, std::chrono::seconds> timer;

{
Expand Down Expand Up @@ -132,7 +132,7 @@ struct index<ColorSets>::meta_builder {
void build(index& idx) {
if (idx.m_k2u.size() != 0) throw std::runtime_error("index already built");

index_type index;
hfur_index_t index;
essentials::logger("step 1. loading index to be partitioned...");
essentials::load(index, m_build_config.index_filename_to_partition.c_str());
essentials::logger("DONE");
Expand All @@ -156,7 +156,7 @@ struct index<ColorSets>::meta_builder {
essentials::logger("step 4. building partial/meta color sets");
timer.start();

atomic_uint64_t num_integers_in_metacolor_sets = 0;
std::atomic_uint64_t num_integers_in_metacolor_sets = 0;
uint64_t num_partial_color_sets = 0;

typename ColorSets::builder color_sets_builder;
Expand Down Expand Up @@ -186,7 +186,7 @@ struct index<ColorSets>::meta_builder {
thread_slices[num_threads] = index.num_color_sets();

auto exe = [&](uint64_t thread_id) {
string tmp_filename = metacolor_set_file_name(thread_id);
std::string tmp_filename = metacolor_set_file_name(thread_id);
uint64_t partition_id = 0;
uint32_t meta_color_set_size = 0;
std::vector<uint32_t> partial_color_set;
Expand Down Expand Up @@ -320,7 +320,7 @@ struct index<ColorSets>::meta_builder {
std::remove(metacolor_set_file_name(thread_id).c_str());

thread_id++;
string tmp_filename = metacolor_set_file_name(thread_id);
std::string tmp_filename = metacolor_set_file_name(thread_id);
metacolor_set_in = std::ifstream(tmp_filename, std::ios::binary);
if (!metacolor_set_in.is_open())
throw std::runtime_error("error in opening file: " + tmp_filename);
Expand Down
6 changes: 3 additions & 3 deletions include/builders/meta_differential_builder.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ struct index<ColorSets>::meta_differential_builder {
void build(index& idx) {
if (idx.m_k2u.size() != 0) throw std::runtime_error("index already built");

meta_index_type meta_index;
mfur_index_t meta_index;
essentials::logger("step 1. loading index to be partitioned");
essentials::load(meta_index, m_build_config.index_filename_to_partition.c_str());
essentials::logger("DONE");
Expand Down Expand Up @@ -85,7 +85,7 @@ struct index<ColorSets>::meta_differential_builder {
num_partition_colors);
std::vector<std::thread> threads(thread_slices.size());

auto encode_color_sets = [&](uint64_t thread_id) {
auto encode_color_sets = [&thread_builders, &thread_slices, &permutation, &meta_partition, num_partition_colors](uint64_t thread_id) {
auto& color_sets_builder = thread_builders[thread_id];
auto& [begin, end] = thread_slices[thread_id];
color_sets_builder.reserve_num_bits(16 * essentials::GB * 8);
Expand Down Expand Up @@ -357,7 +357,7 @@ struct index<ColorSets>::meta_differential_builder {

auto exe = [&](uint64_t thread_id) {
uint64_t l = slice_size * thread_id;
uint64_t r = min(slice_size * (thread_id + 1), idx.m_k2u.num_contigs());
uint64_t r = std::min(slice_size * (thread_id + 1), idx.m_k2u.num_contigs());

for (uint64_t unitig_id = l; unitig_id < r; ++unitig_id) {
auto it = idx.get_k2u().at_contig_id(unitig_id);
Expand Down
8 changes: 4 additions & 4 deletions include/color_sets/differential.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -99,11 +99,11 @@ struct differential {
prev_val = val;
}
}

}

void append(differential::builder const& db) {
if (db.m_color_set_offsets.size() - 1 == 0) return;
void append(builder const& db) {
// I don't know why the following line was here, but removing it seems to solve bugs
// if (db.m_color_set_offsets.size() - 1 == 0) return;
uint64_t delta = m_bvb.num_bits();
m_bvb.append(db.m_bvb);
m_num_total_integers += db.m_num_total_integers;
Expand Down Expand Up @@ -289,7 +289,7 @@ struct differential {
next_differential_val();
next_representative_val();
}
m_curr_val = min(m_curr_differential_val, m_curr_representative_val);
m_curr_val = std::min(m_curr_differential_val, m_curr_representative_val);
}
};

Expand Down
4 changes: 2 additions & 2 deletions include/color_sets/meta_differential.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@ struct meta_differential {
m_partition_sets_offsets.reserve(num_sets);
}

void process_meta_color_partition_set(vector<uint32_t>& partition_set) {
void process_meta_color_partition_set(std::vector<uint32_t>& partition_set) {
m_curr_partition_set = partition_set;
uint64_t size = partition_set.size();
uint32_t prev_val = partition_set[0];
Expand All @@ -58,7 +58,7 @@ struct meta_differential {
m_prev_docs += d.num_colors();
}

void process_metacolor_set(vector<uint32_t>& relative_colors) {
void process_metacolor_set(std::vector<uint32_t>& relative_colors) {
assert(m_curr_partition_set.size() == relative_colors.size());
m_partition_sets_partitions.push_back(false);

Expand Down
7 changes: 4 additions & 3 deletions include/index.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -36,9 +36,10 @@ struct index {
/* from unitig_id to color_set_id */
uint64_t u2c(uint64_t unitig_id) const { return m_u2c_rank1_index.rank1(m_u2c, unitig_id); }

void pseudoalign_full_intersection(std::string const& sequence, //
std::vector<uint32_t>& results) const; //

void fetch_color_set_ids(std::string const& sequence,
std::vector<uint32_t>& color_set_ids) const;
void pseudoalign_full_intersection(std::vector<uint32_t>& color_set_ids, //
std::vector<uint32_t>& results, std::vector<uint32_t>& tmp) const; //
void pseudoalign_threshold_union(std::string const& sequence, //
std::vector<uint32_t>& results, //
const double threshold) const; //
Expand Down
8 changes: 4 additions & 4 deletions include/index_types.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -7,29 +7,29 @@

namespace fulgor {
typedef index<hybrid> hybrid_colors_index_type;
typedef hybrid_colors_index_type index_type; // in use
typedef hybrid_colors_index_type hfur_index_t; // in use
} // namespace fulgor

#include "builders/meta_builder.hpp"
#include "color_sets/meta.hpp"

namespace fulgor {
typedef index<meta<hybrid>> meta_hybrid_colors_index_type;
typedef meta_hybrid_colors_index_type meta_index_type; // in use
typedef meta_hybrid_colors_index_type mfur_index_t; // in use
} // namespace fulgor

#include "builders/differential_builder.hpp"
#include "color_sets/differential.hpp"

namespace fulgor {
typedef index<differential> differential_colors_index_type;
typedef differential_colors_index_type differential_index_type; // in use
typedef differential_colors_index_type dfur_index_t; // in use
} // namespace fulgor

#include "color_sets/meta_differential.hpp"
#include "builders/meta_differential_builder.hpp"

namespace fulgor {
typedef index<meta_differential> meta_differential_colors_index_type;
typedef meta_differential_colors_index_type meta_differential_index_type; // in use
typedef meta_differential_colors_index_type mdfur_index_t; // in use
} // namespace fulgor
55 changes: 51 additions & 4 deletions include/util.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,8 @@
#include "external/smhasher/src/City.h"
#include "external/smhasher/src/City.cpp"

#include "external/FQFeeder/include/blockingconcurrentqueue.h"

namespace fulgor {

enum index_t { HYBRID, DIFF, META, META_DIFF };
Expand All @@ -21,10 +23,10 @@ namespace constants {
constexpr double invalid_threshold = -1.0;
constexpr uint64_t default_ram_limit_in_GiB = 8;
static const std::string default_tmp_dirname(".");
static const std::string fulgor_filename_extension("fur");
static const std::string meta_colored_fulgor_filename_extension("mfur");
static const std::string diff_colored_fulgor_filename_extension("dfur");
static const std::string meta_diff_colored_fulgor_filename_extension("mdfur");
static const std::string hfur_filename_extension("fur");
static const std::string mfur_filename_extension("mfur");
static const std::string dfur_filename_extension("dfur");
static const std::string mdfur_filename_extension("mdfur");

namespace current_version_number {
constexpr uint8_t x = 4;
Expand Down Expand Up @@ -215,5 +217,50 @@ struct hasher_uint128_t {
uint64_t operator()(const __uint128_t x) const { return static_cast<uint64_t>(x) ^ (x >> 64); }
};

inline int num_digits(const uint32_t n) {
if (n >= 10000) {
if (n >= 10000000) {
if (n >= 100000000) {
if (n >= 1000000000)
return 10;
return 9;
}
return 8;
}
if (n >= 100000) {
if (n >= 1000000)
return 7;
return 6;
}
return 5;
}
if (n >= 100) {
if (n >= 1000)
return 4;
return 3;
}
if (n >= 10)
return 2;
return 1;
}

inline void vec_to_tsv(std::vector<uint32_t> const& vec, std::string& s) {
s.clear();
s.reserve(vec.size() * 12);
char buffer[32];
buffer[31] = '\t';
uint32_t tmp;
for (uint32_t x : vec) {
int len = 0;
do {
tmp = x / 10;
buffer[30 - len++] = '0' + (x - tmp * 10);
x = tmp;
} while (x > 0);
s.append(buffer + 31 - len, len + 1);
}
s.pop_back();
}

} // namespace util
} // namespace fulgor
37 changes: 18 additions & 19 deletions src/ps_full_intersection.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -247,12 +247,6 @@ void meta_intersect(std::vector<Iterator>& iterators, std::vector<uint32_t>& col

if (iterators.empty()) return;

for (auto it : iterators) {
while (it.partition_id() != it.num_partitions()) it.next_partition_id();
it.init();
it.change_partition();
}

std::sort(iterators.begin(), iterators.end(), [](auto const& x, auto const& y) {
return x.meta_color_set_size() < y.meta_color_set_size();
});
Expand Down Expand Up @@ -338,10 +332,9 @@ void meta_intersect(std::vector<Iterator>& iterators, std::vector<uint32_t>& col
}

template <typename ColorSets>
void index<ColorSets>::pseudoalign_full_intersection(std::string const& sequence,
std::vector<uint32_t>& colors) const {
void index<ColorSets>::fetch_color_set_ids(std::string const& sequence,
std::vector<uint32_t>& color_set_ids) const {
if (sequence.length() < m_k2u.k()) return;
colors.clear();
std::vector<uint64_t> unitig_ids;

{ /* stream through */
Expand All @@ -362,30 +355,36 @@ void index<ColorSets>::pseudoalign_full_intersection(std::string const& sequence

/* here we use it to hold the color set ids;
in meta_intersect we use it to hold the partition ids */
std::vector<uint32_t> tmp;
std::vector<typename ColorSets::iterator_type> iterators;
color_set_ids.clear();

/* deduplicate unitig_ids */
std::sort(unitig_ids.begin(), unitig_ids.end());
auto end_unitigs = std::unique(unitig_ids.begin(), unitig_ids.end());
tmp.reserve(end_unitigs - unitig_ids.begin());
color_set_ids.reserve(end_unitigs - unitig_ids.begin());
for (auto it = unitig_ids.begin(); it != end_unitigs; ++it) {
uint32_t unitig_id = *it;
uint32_t color_set_id = u2c(unitig_id);
tmp.push_back(color_set_id);
color_set_ids.push_back(color_set_id);
}

/* deduplicate color set ids */
std::sort(tmp.begin(), tmp.end());
auto end_tmp = std::unique(tmp.begin(), tmp.end());
iterators.reserve(end_tmp - tmp.begin());
for (auto it = tmp.begin(); it != end_tmp; ++it) {
uint64_t color_set_id = *it;
std::sort(color_set_ids.begin(), color_set_ids.end());
auto end_tmp = std::unique(color_set_ids.begin(), color_set_ids.end());
color_set_ids.erase(end_tmp, color_set_ids.end());
}

template <typename ColorSets>
void index<ColorSets>::pseudoalign_full_intersection(std::vector<uint32_t>& color_set_ids,
std::vector<uint32_t>& colors, std::vector<uint32_t>& tmp) const {
std::vector<typename ColorSets::iterator_type> iterators;
iterators.reserve(color_set_ids.size());
for (auto color_set_id : color_set_ids) {
auto fwd_it = m_color_sets.color_set(color_set_id);
iterators.push_back(fwd_it);
}

tmp.clear(); // don't need color set ids anymore
colors.clear();
tmp.clear();
if constexpr (ColorSets::type == index_t::META) {
meta_intersect<typename ColorSets::iterator_type, false>(iterators, colors, tmp);
} else if constexpr (ColorSets::type == index_t::META_DIFF) {
Expand Down
Loading
Loading