Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
61 commits
Select commit Hold shift + click to select a range
7d83b94
Get rid of hashing helper header
PointKernel Oct 24, 2025
f3da9ac
Merge remote-tracking branch 'upstream/main'
PointKernel Oct 27, 2025
68b83ad
Add filter_gather_map
PointKernel Oct 28, 2025
2eee15d
Remove redundant file
PointKernel Oct 28, 2025
33de98a
Updates
PointKernel Oct 28, 2025
904b743
Merge remote-tracking branch 'upstream/main' into filter-gather-map
PointKernel Oct 28, 2025
52a77b6
Merge remote-tracking branch 'upstream/main' into filter-gather-map
PointKernel Oct 30, 2025
7b77c83
Merge remote-tracking branch 'upstream/main' into filter-gather-map
PointKernel Oct 30, 2025
c700679
Clean up tests
PointKernel Oct 30, 2025
2cc63d1
More cleanups
PointKernel Oct 30, 2025
bbbc2db
Update throw
PointKernel Oct 30, 2025
401d8ce
Updates
PointKernel Oct 30, 2025
22d5f3d
Fix all tests
PointKernel Oct 30, 2025
bfd0e2c
Updates
PointKernel Oct 31, 2025
2831810
Merge remote-tracking branch 'upstream/main' into filter-gather-map
PointKernel Oct 31, 2025
afb867d
cleanups
PointKernel Oct 31, 2025
a5899fd
Merge remote-tracking branch 'upstream/main' into filter-gather-map
PointKernel Nov 17, 2025
5386571
Fix missing include
PointKernel Nov 17, 2025
ea0e711
Merge remote-tracking branch 'upstream/main' into filter-gather-map
PointKernel Nov 18, 2025
67221bd
Merge remote-tracking branch 'upstream/main' into filter-gather-map
PointKernel Nov 19, 2025
1172a48
Merge remote-tracking branch 'upstream/main' into filter-gather-map
PointKernel Nov 20, 2025
dc1abfc
Fix all bugs
PointKernel Nov 20, 2025
825ee12
Merge remote-tracking branch 'upstream/main' into filter-gather-map
PointKernel Nov 20, 2025
65578ab
Rename as filter_join_indices and clean up tests
PointKernel Nov 21, 2025
90d6955
cleanups
PointKernel Nov 21, 2025
5d61936
Merge remote-tracking branch 'upstream/main' into filter-gather-map
PointKernel Nov 21, 2025
eae791d
cleanups
PointKernel Nov 21, 2025
27f1742
Cleanups
PointKernel Nov 21, 2025
6102d0b
Merge remote-tracking branch 'upstream/main' into filter-gather-map
PointKernel Nov 25, 2025
1d90f93
Merge remote-tracking branch 'upstream/main' into filter-gather-map
PointKernel Nov 25, 2025
62f3e40
Merge remote-tracking branch 'upstream/main' into filter-gather-map
PointKernel Nov 25, 2025
74be87e
Revert dockerfile change
PointKernel Nov 25, 2025
88a95a9
Update docs
PointKernel Nov 25, 2025
f9286d1
Cleanups
PointKernel Nov 25, 2025
745f11f
Rename + minor cleanups
PointKernel Nov 25, 2025
b499a91
Cleanups
PointKernel Nov 25, 2025
b5a02cc
Remove the make_full_range lambda
PointKernel Nov 25, 2025
c3c15f2
Remove redundant lambda
PointKernel Nov 25, 2025
d28296b
Merge remote-tracking branch 'upstream/main' into filter-gather-map
PointKernel Nov 25, 2025
e9aad38
Update docs
PointKernel Nov 25, 2025
d6dbb99
Merge remote-tracking branch 'upstream/main' into filter-gather-map
PointKernel Dec 3, 2025
e8b7cff
Update docs
PointKernel Dec 3, 2025
feb7d18
Simplify inner join validity check
PointKernel Dec 3, 2025
03ebab2
Cleanups + add benchmarks
PointKernel Dec 3, 2025
e4e8193
Minor fix
PointKernel Dec 3, 2025
44b5c97
Merge remote-tracking branch 'upstream/main' into filter-gather-map
PointKernel Dec 3, 2025
6c7ab00
cleanups
PointKernel Dec 3, 2025
773d13b
Update cpp/benchmarks/CMakeLists.txt
PointKernel Dec 3, 2025
0a605c5
support left join
shrshi Dec 4, 2025
aa71e47
partial work
shrshi Dec 4, 2025
16c2dc5
merged main
shrshi Dec 5, 2025
89536a1
fix post processing
shrshi Dec 5, 2025
1ee809c
formatting
shrshi Dec 5, 2025
9e30a09
cleanup
shrshi Dec 5, 2025
d372e91
added comments
shrshi Dec 5, 2025
3bfa588
formatting
shrshi Dec 5, 2025
9ceee22
Merge branch 'main' into smj-left-join
shrshi Dec 5, 2025
a48f491
enable benchmarking for left smj
shrshi Dec 5, 2025
3e7a2c2
Merge branch 'PointKernel-filter-gather-map' into mixed-join-hang-repro
shrshi Dec 5, 2025
f12a456
Merge branch 'smj-left-join' into mixed-join-hang-repro
shrshi Dec 5, 2025
3bf1b08
no hangs after smj replacement
shrshi Dec 5, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions cpp/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -577,6 +577,11 @@ add_library(
src/join/conditional_join.cu
src/join/cross_join.cu
src/join/distinct_hash_join.cu
src/join/filter_join_indices.cu
src/join/filter_join_indices_kernel_complex.cu
src/join/filter_join_indices_kernel_null_complex.cu
src/join/filter_join_indices_kernel_null_primitive.cu
src/join/filter_join_indices_kernel_primitive.cu
src/join/filtered_join.cu
src/join/hash_join.cu
src/join/join.cu
Expand Down
2 changes: 1 addition & 1 deletion cpp/benchmarks/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -148,7 +148,7 @@ ConfigureNVBench(
# * join benchmark --------------------------------------------------------------------------------
ConfigureNVBench(
JOIN_NVBENCH join/left_join.cu join/conditional_join.cu join/join.cu join/mixed_join.cu
join/distinct_join.cu join/multiplicity_join.cu
join/distinct_join.cu join/filter_join_indices.cu join/multiplicity_join.cu
)

# ##################################################################################################
Expand Down
162 changes: 162 additions & 0 deletions cpp/benchmarks/join/filter_join_indices.cu
Original file line number Diff line number Diff line change
@@ -0,0 +1,162 @@
/*
* SPDX-FileCopyrightText: Copyright (c) 2025, NVIDIA CORPORATION.
* SPDX-License-Identifier: Apache-2.0
*/

#include <benchmarks/join/join_common.hpp>

#include <cudf/join/hash_join.hpp>
#include <cudf/join/join.hpp>

auto const num_keys = 2;

template <bool Nullable, cudf::null_equality NullEquality, data_type DataType>
void nvbench_filter_join_indices_inner_join(nvbench::state& state,
nvbench::type_list<nvbench::enum_type<Nullable>,
nvbench::enum_type<NullEquality>,
nvbench::enum_type<DataType>>)
{
auto join = [](cudf::table_view const& left_equality_input,
cudf::table_view const& right_equality_input,
cudf::table_view const& left_conditional_input,
cudf::table_view const& right_conditional_input,
cudf::ast::operation binary_pred,
cudf::null_equality compare_nulls) {
auto hash_joiner = cudf::hash_join(right_equality_input, compare_nulls);
auto [left_indices, right_indices] = hash_joiner.inner_join(left_equality_input);

return cudf::filter_join_indices(left_conditional_input,
right_conditional_input,
cudf::device_span<cudf::size_type const>(*left_indices),
cudf::device_span<cudf::size_type const>(*right_indices),
binary_pred,
cudf::join_kind::INNER_JOIN);
};

auto dtypes = cycle_dtypes(get_type_or_group(static_cast<int32_t>(DataType)), num_keys);
BM_join<Nullable, join_t::MIXED, NullEquality>(state, dtypes, join);
}

template <bool Nullable, cudf::null_equality NullEquality, data_type DataType>
void nvbench_filter_join_indices_inner_join_complex_ast(
nvbench::state& state,
nvbench::type_list<nvbench::enum_type<Nullable>,
nvbench::enum_type<NullEquality>,
nvbench::enum_type<DataType>>)
{
auto const ast_levels = static_cast<cudf::size_type>(state.get_int64("ast_levels"));

auto join = [ast_levels](cudf::table_view const& left_equality_input,
cudf::table_view const& right_equality_input,
cudf::table_view const& left_conditional_input,
cudf::table_view const& right_conditional_input,
cudf::ast::operation binary_pred,
cudf::null_equality compare_nulls) {
cudf::ast::tree tree;
create_complex_ast_expression(tree, ast_levels);

auto hash_joiner = cudf::hash_join(right_equality_input, compare_nulls);
auto [left_indices, right_indices] = hash_joiner.inner_join(left_equality_input);

return cudf::filter_join_indices(left_conditional_input,
right_conditional_input,
cudf::device_span<cudf::size_type const>(*left_indices),
cudf::device_span<cudf::size_type const>(*right_indices),
tree.back(),
cudf::join_kind::INNER_JOIN);
};

auto dtypes = cycle_dtypes(get_type_or_group(static_cast<int32_t>(DataType)), num_keys);
BM_join<Nullable, join_t::MIXED, NullEquality>(state, dtypes, join);
}

template <bool Nullable, cudf::null_equality NullEquality, data_type DataType>
void nvbench_filter_join_indices_left_join(nvbench::state& state,
nvbench::type_list<nvbench::enum_type<Nullable>,
nvbench::enum_type<NullEquality>,
nvbench::enum_type<DataType>>)
{
auto join = [](cudf::table_view const& left_equality_input,
cudf::table_view const& right_equality_input,
cudf::table_view const& left_conditional_input,
cudf::table_view const& right_conditional_input,
cudf::ast::operation binary_pred,
cudf::null_equality compare_nulls) {
auto hash_joiner = cudf::hash_join(right_equality_input, compare_nulls);
auto [left_indices, right_indices] = hash_joiner.left_join(left_equality_input);

return cudf::filter_join_indices(left_conditional_input,
right_conditional_input,
cudf::device_span<cudf::size_type const>(*left_indices),
cudf::device_span<cudf::size_type const>(*right_indices),
binary_pred,
cudf::join_kind::LEFT_JOIN);
};

auto dtypes = cycle_dtypes(get_type_or_group(static_cast<int32_t>(DataType)), num_keys);
BM_join<Nullable, join_t::MIXED, NullEquality>(state, dtypes, join);
}

template <bool Nullable, cudf::null_equality NullEquality, data_type DataType>
void nvbench_filter_join_indices_full_join(nvbench::state& state,
nvbench::type_list<nvbench::enum_type<Nullable>,
nvbench::enum_type<NullEquality>,
nvbench::enum_type<DataType>>)
{
auto join = [](cudf::table_view const& left_equality_input,
cudf::table_view const& right_equality_input,
cudf::table_view const& left_conditional_input,
cudf::table_view const& right_conditional_input,
cudf::ast::operation binary_pred,
cudf::null_equality compare_nulls) {
auto hash_joiner = cudf::hash_join(right_equality_input, compare_nulls);
auto [left_indices, right_indices] = hash_joiner.full_join(left_equality_input);

return cudf::filter_join_indices(left_conditional_input,
right_conditional_input,
cudf::device_span<cudf::size_type const>(*left_indices),
cudf::device_span<cudf::size_type const>(*right_indices),
binary_pred,
cudf::join_kind::FULL_JOIN);
};

auto dtypes = cycle_dtypes(get_type_or_group(static_cast<int32_t>(DataType)), num_keys);
BM_join<Nullable, join_t::MIXED, NullEquality>(state, dtypes, join);
}

NVBENCH_BENCH_TYPES(nvbench_filter_join_indices_inner_join,
NVBENCH_TYPE_AXES(JOIN_NULLABLE_RANGE,
DEFAULT_JOIN_NULL_EQUALITY,
DEFAULT_JOIN_DATATYPES))
.set_name("filter_join_indices_inner_join")
.set_type_axes_names({"Nullable", "NullEquality", "DataType"})
.add_int64_axis("left_size", JOIN_SIZE_RANGE)
.add_int64_axis("right_size", JOIN_SIZE_RANGE);

NVBENCH_BENCH_TYPES(nvbench_filter_join_indices_inner_join_complex_ast,
NVBENCH_TYPE_AXES(JOIN_NULLABLE_RANGE,
DEFAULT_JOIN_NULL_EQUALITY,
DEFAULT_JOIN_DATATYPES))
.set_name("filter_join_indices_inner_join_complex_ast")
.set_type_axes_names({"Nullable", "NullEquality", "DataType"})
.add_int64_axis("left_size", JOIN_SIZE_RANGE)
.add_int64_axis("right_size", JOIN_SIZE_RANGE)
.add_int64_axis("ast_levels", {1, 5, 10});

NVBENCH_BENCH_TYPES(nvbench_filter_join_indices_left_join,
NVBENCH_TYPE_AXES(JOIN_NULLABLE_RANGE,
DEFAULT_JOIN_NULL_EQUALITY,
DEFAULT_JOIN_DATATYPES))
.set_name("filter_join_indices_left_join")
.set_type_axes_names({"Nullable", "NullEquality", "DataType"})
.add_int64_axis("left_size", JOIN_SIZE_RANGE)
.add_int64_axis("right_size", JOIN_SIZE_RANGE);

NVBENCH_BENCH_TYPES(nvbench_filter_join_indices_full_join,
NVBENCH_TYPE_AXES(JOIN_NULLABLE_RANGE,
DEFAULT_JOIN_NULL_EQUALITY,
DEFAULT_JOIN_DATATYPES))
.set_name("filter_join_indices_full_join")
.set_type_axes_names({"Nullable", "NullEquality", "DataType"})
.add_int64_axis("left_size", JOIN_SIZE_RANGE)
.add_int64_axis("right_size", JOIN_SIZE_RANGE);
23 changes: 19 additions & 4 deletions cpp/benchmarks/join/join_common.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -13,13 +13,9 @@
#include <benchmarks/join/nvbench_helpers.hpp>

#include <cudf/ast/expressions.hpp>
#include <cudf/column/column_factories.hpp>
#include <cudf/filling.hpp>
#include <cudf/scalar/scalar_factories.hpp>
#include <cudf/table/table_view.hpp>
#include <cudf/utilities/default_stream.hpp>
#include <cudf/utilities/error.hpp>
#include <cudf/utilities/memory_resource.hpp>

#include <nvbench/nvbench.cuh>

Expand All @@ -42,6 +38,25 @@ using JOIN_NULL_EQUALITY =
using DEFAULT_JOIN_DATATYPES = nvbench::enum_type_list<data_type::INT32>;
using DEFAULT_JOIN_NULL_EQUALITY = nvbench::enum_type_list<cudf::null_equality::UNEQUAL>;

inline void create_complex_ast_expression(cudf::ast::tree& tree, cudf::size_type ast_levels)
{
CUDF_EXPECTS(ast_levels > 0, "Number of AST levels must be greater than 0");

tree.push(cudf::ast::column_reference(0));
tree.push(cudf::ast::column_reference(0, cudf::ast::table_reference::RIGHT));

tree.push(cudf::ast::operation(cudf::ast::ast_operator::EQUAL, tree.at(0), tree.at(1)));

if (ast_levels == 1) { return; }

for (cudf::size_type i = 1; i < ast_levels; i++) {
tree.push(cudf::ast::operation(cudf::ast::ast_operator::EQUAL, tree.at(0), tree.at(1)));

tree.push(cudf::ast::operation(
cudf::ast::ast_operator::LOGICAL_AND, tree.at(tree.size() - 2), tree.back()));
}
}

template <bool Nullable,
join_t join_type = join_t::HASH,
cudf::null_equality compare_nulls = cudf::null_equality::UNEQUAL,
Expand Down
24 changes: 0 additions & 24 deletions cpp/benchmarks/join/mixed_join.cu
Original file line number Diff line number Diff line change
Expand Up @@ -9,30 +9,6 @@

auto const num_keys = 2;

void create_complex_ast_expression(cudf::ast::tree& tree, cudf::size_type ast_levels)
{
CUDF_EXPECTS(ast_levels > 0, "Number of AST levels must be greater than 0");

// For mixed joins, the conditional tables only have 1 column each (column 0)
// So we'll create multiple comparisons of the same column to stress the AST evaluation
tree.push(cudf::ast::column_reference(0));
tree.push(cudf::ast::column_reference(0, cudf::ast::table_reference::RIGHT));

tree.push(cudf::ast::operation(cudf::ast::ast_operator::EQUAL, tree.at(0), tree.at(1)));

if (ast_levels == 1) { return; }

// For multiple levels, create additional comparisons of the same columns
// This will create expressions like: (col0_L == col0_R) && (col0_L == col0_R) && ...
// Total operators created: (2 * ast_levels - 1) = ast_levels EQUAL + (ast_levels-1) LOGICAL_AND
for (cudf::size_type i = 1; i < ast_levels; i++) {
tree.push(cudf::ast::operation(cudf::ast::ast_operator::EQUAL, tree.at(0), tree.at(1)));

tree.push(cudf::ast::operation(
cudf::ast::ast_operator::LOGICAL_AND, tree.at(tree.size() - 2), tree.back()));
}
}

template <bool Nullable, cudf::null_equality NullEquality, data_type DataType>
void nvbench_mixed_inner_join(nvbench::state& state,
nvbench::type_list<nvbench::enum_type<Nullable>,
Expand Down
26 changes: 20 additions & 6 deletions cpp/benchmarks/join/multiplicity_join.cu
Original file line number Diff line number Diff line change
Expand Up @@ -52,20 +52,34 @@ void nvbench_hm_left_join(nvbench::state& state,
nvbench::enum_type<DataType>,
nvbench::enum_type<Algorithm>>)
{
if constexpr (Algorithm == join_t::SORT_MERGE) {
state.skip("Left join using sort-merge algorithm not yet implemented");
if constexpr (not Nullable && NullEquality == cudf::null_equality::UNEQUAL) {
state.skip(
"Since the keys are not nullable, how null entries are to be compared by the join algorithm "
"is immaterial. Therefore, we skip running the benchmark when null equality is set to "
"UNEQUAL since the performance numbers will be the same as when null equality is set to "
"EQUAL.");
return;
}
auto const multiplicity = static_cast<cudf::size_type>(state.get_int64("multiplicity"));
auto const num_keys = state.get_int64("num_keys");
auto dtypes = cycle_dtypes(get_type_or_group(static_cast<int32_t>(DataType)), num_keys);

auto join = [](cudf::table_view const& left_input,
cudf::table_view const& right_input,
cudf::null_equality compare_nulls) {
auto hash_join = [](cudf::table_view const& left_input,
cudf::table_view const& right_input,
cudf::null_equality compare_nulls) {
return cudf::left_join(left_input, right_input, compare_nulls);
};
BM_join<Nullable, Algorithm, NullEquality>(state, dtypes, join, multiplicity);
auto sort_merge_join = [](cudf::table_view const& left_input,
cudf::table_view const& right_input,
cudf::null_equality compare_nulls) {
auto smj = cudf::sort_merge_join(right_input, cudf::sorted::NO, compare_nulls);
return smj.left_join(left_input, cudf::sorted::NO);
};
if constexpr (Algorithm == join_t::HASH) {
BM_join<Nullable, Algorithm, NullEquality>(state, dtypes, hash_join, multiplicity);
} else if constexpr (Algorithm == join_t::SORT_MERGE) {
BM_join<Nullable, Algorithm, NullEquality>(state, dtypes, sort_merge_join, multiplicity);
}
}

template <bool Nullable, cudf::null_equality NullEquality, data_type DataType, join_t Algorithm>
Expand Down
Loading