diff --git a/isthmus/src/main/java/io/substrait/isthmus/AggregateFunctions.java b/isthmus/src/main/java/io/substrait/isthmus/AggregateFunctions.java index 6cba80781..0d5d5bf0e 100644 --- a/isthmus/src/main/java/io/substrait/isthmus/AggregateFunctions.java +++ b/isthmus/src/main/java/io/substrait/isthmus/AggregateFunctions.java @@ -11,31 +11,35 @@ import org.apache.calcite.sql.fun.SqlSumEmptyIsZeroAggFunction; import org.apache.calcite.sql.type.ReturnTypes; +/** + * Provides Substrait-specific variants of Calcite aggregate functions to ensure type inference + * matches Substrait expectations. + * + *

Default Calcite implementations may infer return types that differ from Substrait, causing + * conversion issues. This class overrides those behaviors. + */ public class AggregateFunctions { - // For some arithmetic aggregate functions, the default Calcite aggregate function implementations - // will infer return types that differ from those expected by Substrait. - // This type mismatch can cause conversion and planning failures. - + /** Substrait-specific MIN aggregate function (nullable return type). */ public static SqlAggFunction MIN = new SubstraitSqlMinMaxAggFunction(SqlKind.MIN); + + /** Substrait-specific MAX aggregate function (nullable return type). */ public static SqlAggFunction MAX = new SubstraitSqlMinMaxAggFunction(SqlKind.MAX); + + /** Substrait-specific AVG aggregate function (nullable return type). */ public static SqlAggFunction AVG = new SubstraitAvgAggFunction(SqlKind.AVG); + + /** Substrait-specific SUM aggregate function (nullable return type). */ public static SqlAggFunction SUM = new SubstraitSumAggFunction(); + + /** Substrait-specific SUM0 aggregate function (non-null BIGINT return type). */ public static SqlAggFunction SUM0 = new SubstraitSumEmptyIsZeroAggFunction(); /** - * Some Calcite rules, like {@link - * org.apache.calcite.rel.rules.AggregateExpandDistinctAggregatesRule}, introduce the default - * Calcite aggregate functions into plans. - * - *

When converting these Calcite plans to Substrait, we need to convert the default Calcite - * aggregate calls to the Substrait specific variants. - * - *

This function attempts to convert the given {@code aggFunction} to its Substrait equivalent + * Converts default Calcite aggregate functions to Substrait-specific variants when needed. * - * @param aggFunction the {@link SqlAggFunction} to convert to a Substrait specific variant - * @return an optional containing the Substrait equivalent of the given {@code aggFunction} if - * conversion was needed, empty otherwise. + * @param aggFunction the Calcite aggregate function + * @return optional containing Substrait equivalent if conversion applies */ public static Optional toSubstraitAggVariant(SqlAggFunction aggFunction) { if (aggFunction instanceof SqlMinMaxAggFunction) { @@ -53,7 +57,7 @@ public static Optional toSubstraitAggVariant(SqlAggFunction aggF } } - /** Extension of {@link SqlMinMaxAggFunction} that ALWAYS infers a nullable return type */ + /** Substrait variant of {@link SqlMinMaxAggFunction} that forces nullable return type. */ private static class SubstraitSqlMinMaxAggFunction extends SqlMinMaxAggFunction { public SubstraitSqlMinMaxAggFunction(SqlKind kind) { super(kind); @@ -65,12 +69,10 @@ public RelDataType inferReturnType(SqlOperatorBinding opBinding) { } } - /** Extension of {@link SqlSumAggFunction} that ALWAYS infers a nullable return type */ + /** Substrait variant of {@link SqlSumAggFunction} that forces nullable return type. */ private static class SubstraitSumAggFunction extends SqlSumAggFunction { public SubstraitSumAggFunction() { - // This is intentionally null - // See the instantiation of SqlSumAggFunction in SqlStdOperatorTable - super(null); + super(null); // Matches Calcite's instantiation pattern } @Override @@ -79,7 +81,7 @@ public RelDataType inferReturnType(SqlOperatorBinding opBinding) { } } - /** Extension of {@link SqlAvgAggFunction} that ALWAYS infers a nullable return type */ + /** Substrait variant of {@link SqlAvgAggFunction} that forces nullable return type. */ private static class SubstraitAvgAggFunction extends SqlAvgAggFunction { public SubstraitAvgAggFunction(SqlKind kind) { super(kind); @@ -92,8 +94,8 @@ public RelDataType inferReturnType(SqlOperatorBinding opBinding) { } /** - * Extension of {@link SqlSumEmptyIsZeroAggFunction} that ALWAYS infers a NOT NULL BIGINT return - * type + * Substrait variant of {@link SqlSumEmptyIsZeroAggFunction} that forces BIGINT return type and + * uses a user-friendly name. */ private static class SubstraitSumEmptyIsZeroAggFunction extends org.apache.calcite.sql.fun.SqlSumEmptyIsZeroAggFunction { @@ -103,8 +105,7 @@ public SubstraitSumEmptyIsZeroAggFunction() { @Override public String getName() { - // the default name for this function is `$sum0` - // override this to `sum0` which is a nicer name to use in queries + // Override default `$sum0` with `sum0` for readability return "sum0"; } diff --git a/isthmus/src/main/java/io/substrait/isthmus/CallConverter.java b/isthmus/src/main/java/io/substrait/isthmus/CallConverter.java index 8d68ef612..bc1f465c5 100644 --- a/isthmus/src/main/java/io/substrait/isthmus/CallConverter.java +++ b/isthmus/src/main/java/io/substrait/isthmus/CallConverter.java @@ -6,7 +6,22 @@ import org.apache.calcite.rex.RexCall; import org.apache.calcite.rex.RexNode; +/** + * Functional interface for converting Calcite {@link RexCall} expressions into Substrait {@link + * Expression}s. + * + *

Implementations should return an {@link Optional} containing the converted expression, or + * {@link Optional#empty()} if the call is not handled. + */ @FunctionalInterface public interface CallConverter { + + /** + * Converts a Calcite {@link RexCall} into a Substrait {@link Expression}. + * + * @param call the Calcite function/operator call to convert + * @param topLevelConverter a function for converting nested {@link RexNode} operands + * @return an {@link Optional} containing the converted expression, or empty if not applicable + */ Optional convert(RexCall call, Function topLevelConverter); } diff --git a/isthmus/src/main/java/io/substrait/isthmus/ExtensionUtils.java b/isthmus/src/main/java/io/substrait/isthmus/ExtensionUtils.java index 377020bb3..ba273f0a6 100644 --- a/isthmus/src/main/java/io/substrait/isthmus/ExtensionUtils.java +++ b/isthmus/src/main/java/io/substrait/isthmus/ExtensionUtils.java @@ -7,6 +7,12 @@ import java.util.Set; import java.util.stream.Collectors; +/** + * Utility methods for working with Substrait extensions. + * + *

Provides helpers to identify and extract dynamic (custom/user-defined) functions from an + * {@link io.substrait.extension.SimpleExtension.ExtensionCollection}. + */ public class ExtensionUtils { /** diff --git a/isthmus/src/main/java/io/substrait/isthmus/OuterReferenceResolver.java b/isthmus/src/main/java/io/substrait/isthmus/OuterReferenceResolver.java index eeb645175..64e67e878 100644 --- a/isthmus/src/main/java/io/substrait/isthmus/OuterReferenceResolver.java +++ b/isthmus/src/main/java/io/substrait/isthmus/OuterReferenceResolver.java @@ -15,8 +15,20 @@ import org.apache.calcite.rex.RexSubQuery; import org.apache.calcite.rex.RexUtil.SubQueryCollector; -/** Resolve correlated variable and get Depth map for RexFieldAccess */ -// See OuterReferenceResolver.md for explanation how the Depth map is computed. +/** + * Resolve correlated variables and compute a depth map for {@link RexFieldAccess}. + * + *

Traverses a {@link RelNode} tree and: + * + *

+ * + * See OuterReferenceResolver.md for details on how the depth map is computed. + */ public class OuterReferenceResolver extends RelNodeVisitor { private final Map nestedDepth; @@ -24,23 +36,50 @@ public class OuterReferenceResolver extends RelNodeVisitor(); fieldAccessDepthMap = new IdentityHashMap<>(); } + /** + * Returns the number of "steps out" (nesting depth) for a given {@link RexFieldAccess}. + * + * @param fieldAccess the field access referencing a {@link RexCorrelVariable} + * @return the number of outer scopes between the access and its correlation source, or {@code + * null} if not tracked + */ public int getStepsOut(RexFieldAccess fieldAccess) { return fieldAccessDepthMap.get(fieldAccess); } + /** + * Applies the resolver to a {@link RelNode} tree, computing the depth map. + * + * @param r the root relational node + * @return the same node after traversal + * @throws RuntimeException if the visitor encounters an unrecoverable condition + */ public RelNode apply(RelNode r) { return reverseAccept(r); } + /** + * Returns the computed map from {@link RexFieldAccess} to depth (steps out). + * + * @return map of field access to depth + */ public Map getFieldAccessDepthMap() { return fieldAccessDepthMap; } + /** + * Visits a {@link Filter}, registering any correlation variables and visiting its condition. + * + * @param filter the filter node + * @return the result of {@link RelNodeVisitor#visit(Filter)} + * @throws RuntimeException if traversal fails + */ @Override public RelNode visit(Filter filter) throws RuntimeException { for (CorrelationId id : filter.getVariablesSet()) { @@ -50,6 +89,16 @@ public RelNode visit(Filter filter) throws RuntimeException { return super.visit(filter); } + /** + * Visits a {@link Correlate}, handling correlation depth for both sides. + * + *

Special case: the right side is a correlated subquery in the rel tree (not a REX), so we + * manually adjust depth before/after visiting it. + * + * @param correlate the correlate (correlated join) node + * @return the correlate node + * @throws RuntimeException if traversal fails + */ @Override public RelNode visit(Correlate correlate) throws RuntimeException { for (CorrelationId id : correlate.getVariablesSet()) { @@ -70,6 +119,13 @@ public RelNode visit(Correlate correlate) throws RuntimeException { return correlate; } + /** + * Visits a generic {@link RelNode}, applying traversal to all inputs. + * + * @param other the node to visit + * @return the node + * @throws RuntimeException if traversal fails + */ @Override public RelNode visitOther(RelNode other) throws RuntimeException { for (RelNode child : other.getInputs()) { @@ -78,6 +134,14 @@ public RelNode visitOther(RelNode other) throws RuntimeException { return other; } + /** + * Visits a {@link Project}, registering correlation variables and visiting any subqueries within + * its expressions. + * + * @param project the project node + * @return the result of {@link RelNodeVisitor#visit(Project)} + * @throws RuntimeException if traversal fails + */ @Override public RelNode visit(Project project) throws RuntimeException { for (CorrelationId id : project.getVariablesSet()) { @@ -91,13 +155,25 @@ public RelNode visit(Project project) throws RuntimeException { return super.visit(project); } + /** Rex visitor used to track correlation depth within expressions and subqueries. */ private static class RexVisitor extends RexShuttle { final OuterReferenceResolver referenceResolver; + /** + * Creates a new Rex visitor bound to the given reference resolver. + * + * @param referenceResolver the parent resolver maintaining depth maps + */ RexVisitor(OuterReferenceResolver referenceResolver) { this.referenceResolver = referenceResolver; } + /** + * Increments correlation depth when entering a subquery and decrements when exiting. + * + * @param subQuery the subquery expression + * @return the same subquery + */ @Override public RexNode visitSubQuery(RexSubQuery subQuery) { referenceResolver.nestedDepth.replaceAll((k, v) -> v + 1); @@ -108,6 +184,12 @@ public RexNode visitSubQuery(RexSubQuery subQuery) { return subQuery; } + /** + * Records depth for {@link RexFieldAccess} referencing a {@link RexCorrelVariable}. + * + * @param fieldAccess the field access expression + * @return the same field access + */ @Override public RexNode visitFieldAccess(RexFieldAccess fieldAccess) { if (fieldAccess.getReferenceExpr() instanceof RexCorrelVariable) { diff --git a/isthmus/src/main/java/io/substrait/isthmus/PreCalciteAggregateValidator.java b/isthmus/src/main/java/io/substrait/isthmus/PreCalciteAggregateValidator.java index f2419ab01..80d17d1c0 100644 --- a/isthmus/src/main/java/io/substrait/isthmus/PreCalciteAggregateValidator.java +++ b/isthmus/src/main/java/io/substrait/isthmus/PreCalciteAggregateValidator.java @@ -12,23 +12,23 @@ import java.util.stream.Collectors; /** - * Not all Substrait {@link Aggregate} rels are convertable to {@link - * org.apache.calcite.rel.core.Aggregate} rels + * Validates and rewrites Substrait {@link Aggregate} relations for compatibility with Calcite + * {@link org.apache.calcite.rel.core.Aggregate}. * - *

The code in this class can: + *

Responsibilities: * *

    - *
  • Check for these cases - *
  • Rewrite the Substrait {@link Aggregate} such that it can be converted to Calcite + *
  • Check if an {@link Aggregate} can be converted directly to Calcite + *
  • Rewrite invalid aggregates into a form acceptable by Calcite *
*/ public class PreCalciteAggregateValidator { /** - * Checks that the given {@link Aggregate} is valid for use in Calcite + * Checks whether the given {@link Aggregate} is valid for Calcite conversion. * - * @param aggregate - * @return + * @param aggregate the Substrait aggregate relation + * @return {@code true} if valid for Calcite, {@code false} otherwise */ public static boolean isValidCalciteAggregate(Aggregate aggregate) { return aggregate.getMeasures().stream() @@ -38,12 +38,11 @@ public static boolean isValidCalciteAggregate(Aggregate aggregate) { } /** - * Checks that all expressions present in the given {@link Aggregate.Measure} are {@link - * FieldReference}s, as Calcite expects all expressions in {@link - * org.apache.calcite.rel.core.Aggregate}s to be field references. + * Checks if an {@link Aggregate.Measure} uses only {@link FieldReference}s for arguments, sort + * fields, and pre-measure filter. * - * @return true if the {@code measure} can be converted to a Calcite equivalent without changes, - * false otherwise. + * @param measure the aggregate measure to validate + * @return {@code true} if valid, {@code false} otherwise */ private static boolean isValidCalciteMeasure(Aggregate.Measure measure) { return @@ -58,32 +57,19 @@ private static boolean isValidCalciteMeasure(Aggregate.Measure measure) { } /** - * Checks that all expressions present in the given {@link Aggregate.Grouping} are {@link - * FieldReference}s, as Calcite expects all expressions in {@link - * org.apache.calcite.rel.core.Aggregate}s to be field references. + * Checks if an {@link Aggregate.Grouping} uses only {@link FieldReference}s and ensures grouping + * fields are in ascending order. * - *

Additionally, checks that all grouping fields are specified in ascending order. - * - * @return true if the {@code grouping} can be converted to a Calcite equivalent without changes, - * false otherwise. + * @param grouping the aggregate grouping to validate + * @return {@code true} if valid, {@code false} otherwise */ private static boolean isValidCalciteGrouping(Aggregate.Grouping grouping) { if (!grouping.getExpressions().stream().allMatch(e -> isSimpleFieldReference(e))) { - // all grouping expressions must be field references return false; } - // Calcite stores grouping fields in an ImmutableBitSet and does not track the order of the - // grouping fields. The output record shape that Calcite generates ALWAYS has the groupings in - // ascending field order. This causes issues with Substrait in cases where the grouping fields - // in Substrait are not defined in ascending order. - - // For example, if a grouping is defined as (0, 2, 1) in Substrait, Calcite will output it as - // (0, 1, 2), which means that the Calcite output will no longer line up with the expectations - // of the Substrait plan. List groupingFields = grouping.getExpressions().stream() - // isSimpleFieldReference above guarantees that the expr is a FieldReference .map(expr -> getFieldRefOffset((FieldReference) expr)) .collect(Collectors.toList()); @@ -112,6 +98,10 @@ private static boolean isOrdered(List list) { return true; } + /** + * Transforms invalid aggregates into Calcite-compatible form by projecting non-field expressions + * and reordering groupings. + */ public static class PreCalciteAggregateTransformer { // New expressions to include in the project before the aggregate @@ -122,18 +112,19 @@ public static class PreCalciteAggregateTransformer { private PreCalciteAggregateTransformer(Aggregate aggregate) { this.newExpressions = new ArrayList<>(); - // The Substrait project output includes all input fields, followed by expressions this.expressionOffset = aggregate.getInput().getRecordType().fields().size(); } /** - * Transforms an {@link Aggregate} that cannot be handled by Calcite into an equivalent that can - * be handled by: + * Rewrites an {@link Aggregate} so that it can be converted to Calcite by: * *

    - *
  • Moving all non-field references into a project before the aggregation - *
  • Adding all groupings to this project so that they are referenced in "order" + *
  • Projecting non-field references before aggregation + *
  • Ensuring groupings are in ascending order *
+ * + * @param aggregate the original Substrait aggregate + * @return a transformed Calcite-compatible aggregate */ public static Aggregate transformToValidCalciteAggregate(Aggregate aggregate) { PreCalciteAggregateTransformer at = new PreCalciteAggregateTransformer(aggregate); @@ -189,8 +180,6 @@ private Aggregate.Measure updateMeasure(Aggregate.Measure measure) { } private Aggregate.Grouping updateGrouping(Aggregate.Grouping grouping) { - // project out all groupings unconditionally, even field references - // this ensures that out of order groupings are re-projected into in order groupings List newGroupingExpressions = grouping.getExpressions().stream().map(this::projectOut).collect(Collectors.toList()); return Aggregate.Grouping.builder().expressions(newGroupingExpressions).build(); @@ -212,14 +201,15 @@ private Expression projectOutNonFieldReference(Expression expr) { } /** - * Adds a new expression to the project at {@link - * PreCalciteAggregateTransformer#expressionOffset} and returns a field reference to the new - * expression + * Adds a new expression to the pre-aggregate project and returns a field reference pointing to + * it. + * + * @param expr the expression to project out + * @return a {@link FieldReference} to the projected expression */ private Expression projectOut(Expression expr) { newExpressions.add(expr); return FieldReference.builder() - // create a field reference to the new expression, then update the expression offset .addSegments(FieldReference.StructField.of(expressionOffset++)) .type(expr.getType()) .build(); diff --git a/isthmus/src/main/java/io/substrait/isthmus/RelNodeVisitor.java b/isthmus/src/main/java/io/substrait/isthmus/RelNodeVisitor.java index 81c4e9a49..9a45ee40c 100644 --- a/isthmus/src/main/java/io/substrait/isthmus/RelNodeVisitor.java +++ b/isthmus/src/main/java/io/substrait/isthmus/RelNodeVisitor.java @@ -18,80 +18,215 @@ import org.apache.calcite.rel.core.Union; import org.apache.calcite.rel.core.Values; -/** A more generic version of RelShuttle that allows an alternative return value. */ +/** + * A generic visitor for {@link RelNode} trees that supports custom return types and checked + * exceptions. + * + *

provides type-safe methods for common Calcite relational operators and a fallback for + * unhandled types. It is useful when implementing transformations or analysis logic over relational + * expressions without extending Calcite's built-in visitor classes. + * + * @param the return type of visitor methods + * @param the checked exception type that may be thrown during visiting + */ public abstract class RelNodeVisitor { - + /** + * Visits a {@link TableScan} node. + * + * @param scan the table scan node + * @return the result of visiting this node + * @throws EXCEPTION if an error occurs during processing + */ public OUTPUT visit(TableScan scan) throws EXCEPTION { return visitOther(scan); } + /** + * Visits a {@link TableFunctionScan} node. + * + * @param scan the table function scan node + * @return the result of visiting this node + * @throws EXCEPTION if an error occurs during processing + */ public OUTPUT visit(TableFunctionScan scan) throws EXCEPTION { return visitOther(scan); } + /** + * Visits a {@link Values} node. + * + * @param values the values node + * @return the result of visiting this node + * @throws EXCEPTION if an error occurs during processing + */ public OUTPUT visit(Values values) throws EXCEPTION { return visitOther(values); } + /** + * Visits a {@link Filter} node. + * + * @param filter the filter node + * @return the result of visiting this node + * @throws EXCEPTION if an error occurs during processing + */ public OUTPUT visit(Filter filter) throws EXCEPTION { return visitOther(filter); } + /** + * Visits a {@link Calc} node. + * + * @param calc the calc node + * @return the result of visiting this node + * @throws EXCEPTION if an error occurs during processing + */ public OUTPUT visit(Calc calc) throws EXCEPTION { return visitOther(calc); } + /** + * Visits a {@link Project} node. + * + * @param project the project node + * @return the result of visiting this node + * @throws EXCEPTION if an error occurs during processing + */ public OUTPUT visit(Project project) throws EXCEPTION { return visitOther(project); } + /** + * Visits a {@link Join} node. + * + * @param join the join node + * @return the result of visiting this node + * @throws EXCEPTION if an error occurs during processing + */ public OUTPUT visit(Join join) throws EXCEPTION { return visitOther(join); } + /** + * Visits a {@link Correlate} node. + * + * @param correlate the correlate node + * @return the result of visiting this node + * @throws EXCEPTION if an error occurs during processing + */ public OUTPUT visit(Correlate correlate) throws EXCEPTION { return visitOther(correlate); } + /** + * Visits a {@link Union} node. + * + * @param union the union node + * @return the result of visiting this node + * @throws EXCEPTION if an error occurs during processing + */ public OUTPUT visit(Union union) throws EXCEPTION { return visitOther(union); } + /** + * Visits an {@link Intersect} node. + * + * @param intersect the intersect node + * @return the result of visiting this node + * @throws EXCEPTION if an error occurs during processing + */ public OUTPUT visit(Intersect intersect) throws EXCEPTION { return visitOther(intersect); } + /** + * Visits a {@link Minus} node. + * + * @param minus the minus node + * @return the result of visiting this node + * @throws EXCEPTION if an error occurs during processing + */ public OUTPUT visit(Minus minus) throws EXCEPTION { return visitOther(minus); } + /** + * Visits an {@link Aggregate} node. + * + * @param aggregate the aggregate node + * @return the result of visiting this node + * @throws EXCEPTION if an error occurs during processing + */ public OUTPUT visit(Aggregate aggregate) throws EXCEPTION { return visitOther(aggregate); } + /** + * Visits a {@link Match} node. + * + * @param match the match node + * @return the result of visiting this node + * @throws EXCEPTION if an error occurs during processing + */ public OUTPUT visit(Match match) throws EXCEPTION { return visitOther(match); } + /** + * Visits a {@link Sort} node. + * + * @param sort the sort node + * @return the result of visiting this node + * @throws EXCEPTION if an error occurs during processing + */ public OUTPUT visit(Sort sort) throws EXCEPTION { return visitOther(sort); } + /** + * Visits an {@link Exchange} node. + * + * @param exchange the exchange node + * @return the result of visiting this node + * @throws EXCEPTION if an error occurs during processing + */ public OUTPUT visit(Exchange exchange) throws EXCEPTION { return visitOther(exchange); } + /** + * Visits a {@link TableModify} node. + * + * @param modify the table modify node + * @return the result of visiting this node + * @throws EXCEPTION if an error occurs during processing + */ public OUTPUT visit(TableModify modify) throws EXCEPTION { return visitOther(modify); } + /** + * Fallback method for visiting any {@link RelNode} type not explicitly handled. + * + * @param other the relational node + * @return the result of visiting this node + * @throws EXCEPTION if an error occurs during processing + */ public abstract OUTPUT visitOther(RelNode other) throws EXCEPTION; + /** Protected constructor to enforce subclassing. */ protected RelNodeVisitor() {} /** - * The method you call when you would normally call RelNode.accept(visitor). Instead call - * RelVisitor.reverseAccept(RelNode) due to the lack of ability to extend base classes. + * Dispatches to the appropriate visit method based on the runtime type of the {@link RelNode}. + * + *

Use this instead of {@code RelNode.accept(visitor)} because {@link RelNodeVisitor} cannot + * extend Calcite's base visitor classes. + * + * @param node the relational node to visit + * @return the result of visiting this node + * @throws EXCEPTION if an error occurs during processing */ public final OUTPUT reverseAccept(RelNode node) throws EXCEPTION { if (node instanceof TableScan) { diff --git a/isthmus/src/main/java/io/substrait/isthmus/SchemaCollector.java b/isthmus/src/main/java/io/substrait/isthmus/SchemaCollector.java index 99eaac1ab..20fb7c840 100644 --- a/isthmus/src/main/java/io/substrait/isthmus/SchemaCollector.java +++ b/isthmus/src/main/java/io/substrait/isthmus/SchemaCollector.java @@ -25,6 +25,12 @@ public class SchemaCollector { private final RelDataTypeFactory typeFactory; private final TypeConverter typeConverter; + /** + * Creates a new {@code SchemaCollector} with the specified type factory and type converter. + * + * @param typeFactory Calcite {@link RelDataTypeFactory} used for creating relational types + * @param typeConverter converter for mapping between Calcite and Substrait types + */ public SchemaCollector(RelDataTypeFactory typeFactory, TypeConverter typeConverter) { this.typeFactory = typeFactory; this.typeConverter = typeConverter; diff --git a/isthmus/src/main/java/io/substrait/isthmus/SimpleExtensionToSqlOperator.java b/isthmus/src/main/java/io/substrait/isthmus/SimpleExtensionToSqlOperator.java index 3c61acd94..fa622c7f1 100644 --- a/isthmus/src/main/java/io/substrait/isthmus/SimpleExtensionToSqlOperator.java +++ b/isthmus/src/main/java/io/substrait/isthmus/SimpleExtensionToSqlOperator.java @@ -23,6 +23,21 @@ import org.apache.calcite.sql.type.SqlTypeFamily; import org.apache.calcite.sql.type.SqlTypeName; +/** + * Utility class for converting Substrait {@link SimpleExtension} function definitions (scalar and + * aggregate) into Calcite {@link SqlOperator}s. + * + *

This enables Calcite to recognize and use Substrait-defined functions during query planning + * and execution. Conversion includes: + * + *

    + *
  • Mapping Substrait types to Calcite {@link SqlTypeName} + *
  • Building {@link SqlFunction} instances with proper argument families + *
  • Inferring return types based on Substrait type expressions and nullability rules + *
+ * + *

Currently supports scalar and aggregate functions; window functions are not yet implemented. + */ public final class SimpleExtensionToSqlOperator { private static final RelDataTypeFactory DEFAULT_TYPE_FACTORY = @@ -32,15 +47,40 @@ public final class SimpleExtensionToSqlOperator { private SimpleExtensionToSqlOperator() {} + /** + * Converts all functions in a Substrait {@link SimpleExtension.ExtensionCollection} (scalar and + * aggregate) into Calcite {@link SqlOperator}s using the default type factory. + * + * @param collection The Substrait extension collection containing function definitions. + * @return A list of Calcite {@link SqlOperator}s corresponding to the Substrait functions. + */ public static List from(SimpleExtension.ExtensionCollection collection) { return from(collection, DEFAULT_TYPE_FACTORY); } + /** + * Converts all functions in a Substrait {@link SimpleExtension.ExtensionCollection} (scalar and + * aggregate) into Calcite {@link SqlOperator}s using a provided type factory. + * + * @param collection The Substrait extension collection containing function definitions. + * @param typeFactory Calcite {@link RelDataTypeFactory} for type creation and inference. + * @return A list of Calcite {@link SqlOperator}s corresponding to the Substrait functions. + */ public static List from( SimpleExtension.ExtensionCollection collection, RelDataTypeFactory typeFactory) { return from(collection, typeFactory, TypeConverter.DEFAULT); } + /** + * Converts all functions in a Substrait {@link SimpleExtension.ExtensionCollection} (scalar and + * aggregate) into Calcite {@link SqlOperator}s with a custom type factory and {@link + * TypeConverter}. + * + * @param collection The Substrait extension collection containing function definitions. + * @param typeFactory Calcite {@link RelDataTypeFactory} for type creation and inference. + * @param typeConverter Converter for Substrait/Calcite type mappings. + * @return A list of Calcite {@link SqlOperator}s corresponding to the Substrait functions. + */ public static List from( SimpleExtension.ExtensionCollection collection, RelDataTypeFactory typeFactory, diff --git a/isthmus/src/main/java/io/substrait/isthmus/SqlConverterBase.java b/isthmus/src/main/java/io/substrait/isthmus/SqlConverterBase.java index f667deab0..27b9705b3 100644 --- a/isthmus/src/main/java/io/substrait/isthmus/SqlConverterBase.java +++ b/isthmus/src/main/java/io/substrait/isthmus/SqlConverterBase.java @@ -18,23 +18,49 @@ import org.apache.calcite.sql.validate.SqlConformanceEnum; import org.apache.calcite.sql2rel.SqlToRelConverter; +/** + * Base class for Substrait SQL conversion pipelines. + * + *

Configures Calcite parser, connection, planner, and cluster. Holds the Substrait extensions + * and feature flags. Subclasses can build conversions from SQL to Calcite/Substrait using this + * shared setup. + */ public class SqlConverterBase { + /** Substrait extension collection used for function/operator mappings. */ protected final SimpleExtension.ExtensionCollection extensionCollection; + /** Default Calcite connection config (case-insensitive). */ public static final CalciteConnectionConfig CONNECTION_CONFIG = CalciteConnectionConfig.DEFAULT.set( CalciteConnectionProperty.CASE_SENSITIVE, Boolean.FALSE.toString()); + /** Calcite type factory using the Substrait type system. */ final RelDataTypeFactory factory; + + /** Calcite optimization cluster with planner, type factory, and RexBuilder. */ final RelOptCluster relOptCluster; + + /** Connection configuration used for SQL parsing and validation. */ final CalciteConnectionConfig config; + + /** Configuration for SQL-to-Rel conversion. */ final SqlToRelConverter.Config converterConfig; + /** Parser configuration, including casing and DDL parser factory. */ final SqlParser.Config parserConfig; + /** Default feature board if none is provided. */ protected static final FeatureBoard FEATURES_DEFAULT = ImmutableFeatureBoard.builder().build(); + + /** Feature flags controlling conversion behavior. */ final FeatureBoard featureBoard; + /** + * Creates a converter base with explicit features and extensions. + * + * @param features Feature flags controlling behavior; if {@code null}, defaults are used. + * @param extensionCollection Substrait extension collection for mapping functions/operators. + */ protected SqlConverterBase( FeatureBoard features, SimpleExtension.ExtensionCollection extensionCollection) { this.factory = SubstraitTypeSystem.TYPE_FACTORY; @@ -59,6 +85,11 @@ protected SqlConverterBase( this.extensionCollection = extensionCollection; } + /** + * Creates a converter base with explicit features and the default Substrait extension catalog. + * + * @param features Feature flags controlling behavior; if {@code null}, defaults are used. + */ protected SqlConverterBase(FeatureBoard features) { this(features, DefaultExtensionCatalog.DEFAULT_COLLECTION); } diff --git a/isthmus/src/main/java/io/substrait/isthmus/SqlExpressionToSubstrait.java b/isthmus/src/main/java/io/substrait/isthmus/SqlExpressionToSubstrait.java index 3d45f8bde..1863649fc 100644 --- a/isthmus/src/main/java/io/substrait/isthmus/SqlExpressionToSubstrait.java +++ b/isthmus/src/main/java/io/substrait/isthmus/SqlExpressionToSubstrait.java @@ -30,14 +30,28 @@ import org.apache.calcite.sql2rel.SqlToRelConverter; import org.apache.calcite.sql2rel.StandardConvertletTable; +/** + * Converts SQL expressions to Substrait {@link io.substrait.proto.ExtendedExpression} payloads. + * + *

Supports optional CREATE TABLE statements to provide schema and column bindings for expression + * validation and Rex conversion. + */ public class SqlExpressionToSubstrait extends SqlConverterBase { + /** Converter for RexNodes to Substrait expressions. */ protected final RexExpressionConverter rexConverter; + /** Creates a converter with default features and the default extension catalog. */ public SqlExpressionToSubstrait() { this(FEATURES_DEFAULT, DefaultExtensionCatalog.DEFAULT_COLLECTION); } + /** + * Creates a converter with the given feature board and extension collection. + * + * @param features feature flags for conversion + * @param extensions extension functions used during expression conversion + */ public SqlExpressionToSubstrait( FeatureBoard features, SimpleExtension.ExtensionCollection extensions) { super(features, extensions); @@ -46,12 +60,21 @@ public SqlExpressionToSubstrait( this.rexConverter = new RexExpressionConverter(scalarFunctionConverter); } + /** Bundled result carrying validator, catalog reader, and name/type and name/node maps. */ private static final class Result { final SqlValidator validator; final CalciteCatalogReader catalogReader; final Map nameToTypeMap; final Map nameToNodeMap; + /** + * Creates a result bundle. + * + * @param validator SQL validator + * @param catalogReader Calcite catalog reader + * @param nameToTypeMap mapping from column name to Calcite type + * @param nameToNodeMap mapping from column name to Rex input ref + */ Result( SqlValidator validator, CalciteCatalogReader catalogReader, @@ -65,12 +88,12 @@ private static final class Result { } /** - * Converts the given SQL expression to an {@link io.substrait.proto.ExtendedExpression } + * Converts a single SQL expression to a Substrait {@link io.substrait.proto.ExtendedExpression}. * * @param sqlExpression a SQL expression * @param createStatements table creation statements defining fields referenced by the expression - * @return a {@link io.substrait.proto.ExtendedExpression } - * @throws SqlParseException + * @return the Substrait extended expression proto + * @throws SqlParseException if parsing or validation fails */ public io.substrait.proto.ExtendedExpression convert( String sqlExpression, List createStatements) throws SqlParseException { @@ -78,12 +101,12 @@ public io.substrait.proto.ExtendedExpression convert( } /** - * Converts the given SQL expressions to an {@link io.substrait.proto.ExtendedExpression } + * Converts multiple SQL expressions to a Substrait {@link io.substrait.proto.ExtendedExpression}. * - * @param sqlExpressions an array of SQL expressions - * @param createStatements table creation statements defining fields referenced by the expression - * @return a {@link io.substrait.proto.ExtendedExpression } - * @throws SqlParseException + * @param sqlExpressions array of SQL expressions + * @param createStatements table creation statements defining fields referenced by the expressions + * @return the Substrait extended expression proto + * @throws SqlParseException if parsing or validation fails */ public io.substrait.proto.ExtendedExpression convert( String[] sqlExpressions, List createStatements) throws SqlParseException { @@ -96,6 +119,17 @@ public io.substrait.proto.ExtendedExpression convert( result.nameToNodeMap); } + /** + * Converts the given SQL expressions using the provided validator/catalog and column bindings. + * + * @param sqlExpressions array of SQL expressions + * @param validator SQL validator + * @param catalogReader Calcite catalog reader + * @param nameToTypeMap mapping from column name to Calcite type + * @param nameToNodeMap mapping from column name to Rex input ref + * @return the Substrait extended expression proto + * @throws SqlParseException if parsing or validation fails + */ private io.substrait.proto.ExtendedExpression executeInnerSQLExpressions( String[] sqlExpressions, SqlValidator validator, @@ -126,6 +160,17 @@ private io.substrait.proto.ExtendedExpression executeInnerSQLExpressions( return new ExtendedExpressionProtoConverter().toProto(extendedExpression.build()); } + /** + * Parses and validates a SQL expression, then converts it to a {@link RexNode}. + * + * @param sql SQL expression string + * @param validator SQL validator + * @param catalogReader Calcite catalog reader + * @param nameToTypeMap mapping from column name to Calcite type + * @param nameToNodeMap mapping from column name to Rex input ref + * @return the converted RexNode + * @throws SqlParseException if parsing or validation fails + */ private RexNode sqlToRexNode( String sql, SqlValidator validator, @@ -147,6 +192,13 @@ private RexNode sqlToRexNode( return converter.convertExpression(validSqlNode, nameToNodeMap); } + /** + * Registers tables from CREATE statements and prepares validator, catalog, and column bindings. + * + * @param tables list of CREATE TABLE statements; may be null + * @return result bundle containing validator, catalog reader, and name/type and name/node maps + * @throws SqlParseException if any CREATE statement is invalid + */ private Result registerCreateTablesForExtendedExpression(List tables) throws SqlParseException { Map nameToTypeMap = new LinkedHashMap<>(); @@ -183,6 +235,12 @@ private Result registerCreateTablesForExtendedExpression(List tables) return new Result(validator, catalogReader, nameToTypeMap, nameToNodeMap); } + /** + * Converts a name-to-type map into a {@link NamedStruct} in Substrait types. + * + * @param nameToTypeMap mapping from column name to Calcite type + * @return a {@link NamedStruct} with non-nullable struct type + */ private NamedStruct toNamedStruct(Map nameToTypeMap) { ArrayList names = new ArrayList(); ArrayList types = new ArrayList(); diff --git a/isthmus/src/main/java/io/substrait/isthmus/SqlToSubstrait.java b/isthmus/src/main/java/io/substrait/isthmus/SqlToSubstrait.java index e60494244..a34d5bb98 100644 --- a/isthmus/src/main/java/io/substrait/isthmus/SqlToSubstrait.java +++ b/isthmus/src/main/java/io/substrait/isthmus/SqlToSubstrait.java @@ -21,14 +21,31 @@ public class SqlToSubstrait extends SqlConverterBase { private final SqlOperatorTable operatorTable; + /** + * Creates a SQL-to-Substrait converter using the default extension catalog and no feature + * overrides. + */ public SqlToSubstrait() { this(DefaultExtensionCatalog.DEFAULT_COLLECTION, null); } + /** + * Creates a SQL-to-Substrait converter using the default extension catalog and provided features. + * + * @param features Feature flags controlling conversion behavior; may be {@code null} for + * defaults. + */ public SqlToSubstrait(FeatureBoard features) { this(DefaultExtensionCatalog.DEFAULT_COLLECTION, features); } + /** + * Creates a SQL-to-Substrait converter with explicit extensions and features. + * + * @param extensions Substrait extension collection for function/operator mappings. + * @param features Feature flags controlling conversion behavior; may be {@code null} for + * defaults. + */ public SqlToSubstrait(SimpleExtension.ExtensionCollection extensions, FeatureBoard features) { super(features, extensions); diff --git a/isthmus/src/main/java/io/substrait/isthmus/SubstraitRelNodeConverter.java b/isthmus/src/main/java/io/substrait/isthmus/SubstraitRelNodeConverter.java index 47daf97e2..679dfc42f 100644 --- a/isthmus/src/main/java/io/substrait/isthmus/SubstraitRelNodeConverter.java +++ b/isthmus/src/main/java/io/substrait/isthmus/SubstraitRelNodeConverter.java @@ -95,16 +95,35 @@ public class SubstraitRelNodeConverter extends AbstractRelVisitor { + /** Calcite type factory used to construct row and field types. */ protected final RelDataTypeFactory typeFactory; + /** Converter for Substrait scalar functions to Calcite operators. */ protected final ScalarFunctionConverter scalarFunctionConverter; + + /** Converter for Substrait aggregate functions to Calcite operators. */ protected final AggregateFunctionConverter aggregateFunctionConverter; + + /** Converts Substrait {@code Expression}s into Calcite {@code RexNode}s. */ protected final ExpressionRexConverter expressionRexConverter; + /** Calcite {@link RelBuilder} used to construct relational expressions during conversion. */ protected final RelBuilder relBuilder; + + /** Calcite {@link RexBuilder} used to build Rex nodes (e.g., input refs, literals). */ protected final RexBuilder rexBuilder; + + /** Type converter to translate between Calcite and Substrait type systems. */ private final TypeConverter typeConverter; + /** + * Constructs a converter using the given extensions, type factory, and builder with default + * features. + * + * @param extensions Substrait extension collection + * @param typeFactory Calcite type factory + * @param relBuilder Calcite {@link RelBuilder} used to build relational expressions + */ public SubstraitRelNodeConverter( SimpleExtension.ExtensionCollection extensions, RelDataTypeFactory typeFactory, @@ -112,6 +131,14 @@ public SubstraitRelNodeConverter( this(extensions, typeFactory, relBuilder, ImmutableFeatureBoard.builder().build()); } + /** + * Constructs a converter using the given extensions and feature board. + * + * @param extensions Substrait extension collection (scalar/aggregate/window function variants) + * @param typeFactory Calcite type factory + * @param relBuilder Calcite {@link RelBuilder} + * @param featureBoard feature flags controlling behavior (e.g., dynamic UDFs) + */ public SubstraitRelNodeConverter( SimpleExtension.ExtensionCollection extensions, RelDataTypeFactory typeFactory, @@ -126,6 +153,16 @@ public SubstraitRelNodeConverter( TypeConverter.DEFAULT); } + /** + * Constructs a converter with explicit function converters and type converter. + * + * @param typeFactory Calcite type factory + * @param relBuilder Calcite {@link RelBuilder} + * @param scalarFunctionConverter converter for scalar functions + * @param aggregateFunctionConverter converter for aggregate functions + * @param windowFunctionConverter converter for window functions + * @param typeConverter Calcite↔Substrait type converter + */ public SubstraitRelNodeConverter( RelDataTypeFactory typeFactory, RelBuilder relBuilder, @@ -144,6 +181,17 @@ public SubstraitRelNodeConverter( typeFactory, scalarFunctionConverter, windowFunctionConverter, typeConverter)); } + /** + * Constructs a converter with an explicit {@link ExpressionRexConverter}. + * + * @param typeFactory Calcite type factory + * @param relBuilder Calcite {@link RelBuilder} + * @param scalarFunctionConverter converter for scalar functions + * @param aggregateFunctionConverter converter for aggregate functions + * @param windowFunctionConverter converter for window functions + * @param typeConverter Calcite↔Substrait type converter + * @param expressionRexConverter converter for Substrait expressions to Calcite Rex nodes + */ public SubstraitRelNodeConverter( RelDataTypeFactory typeFactory, RelBuilder relBuilder, @@ -202,6 +250,21 @@ private static ScalarFunctionConverter createScalarFunctionConverter( extensions.scalarFunctions(), additionalSignatures, typeFactory, TypeConverter.DEFAULT); } + /** + * Converts a Substrait {@link Rel} plan to a Calcite {@link RelNode} using default feature + * settings. + * + *

This method creates a {@link RelBuilder} configured with the provided cluster and catalog, + * then delegates to {@link #convert(Rel, RelOptCluster, Prepare.CatalogReader, SqlParser.Config, + * SimpleExtension.ExtensionCollection, FeatureBoard)} with default features. + * + * @param relRoot the root Substrait relation to convert + * @param relOptCluster the Calcite cluster providing optimization context + * @param catalogReader the Calcite catalog reader for schema resolution + * @param parserConfig the SQL parser configuration + * @param extensions the Substrait extension collection (scalar, aggregate, window functions) + * @return the converted Calcite {@link RelNode} + */ public static RelNode convert( Rel relRoot, RelOptCluster relOptCluster, @@ -217,6 +280,21 @@ public static RelNode convert( ImmutableFeatureBoard.builder().build()); } + /** + * Converts a Substrait {@link Rel} plan to a Calcite {@link RelNode} using the specified feature + * board. + * + *

This method initializes a {@link RelBuilder} with the given cluster and catalog, then + * constructs a {@link SubstraitRelNodeConverter} to perform the conversion. + * + * @param relRoot the root Substrait relation to convert + * @param relOptCluster the Calcite cluster providing optimization context + * @param catalogReader the Calcite catalog reader for schema resolution + * @param parserConfig the SQL parser configuration + * @param extensions the Substrait extension collection (scalar, aggregate, window functions) + * @param featureBoard feature flags controlling conversion behavior (e.g., dynamic UDF support) + * @return the converted Calcite {@link RelNode} + */ public static RelNode convert( Rel relRoot, RelOptCluster relOptCluster, @@ -850,6 +928,16 @@ public RelNode visitFallback(Rel rel, Context context) throws RuntimeException { rel, rel.getClass().getCanonicalName(), this.getClass().getCanonicalName())); } + /** + * Applies an optional field remap to the given node. + * + *

If {@code remap} is present, the node is projected according to the provided indices; + * otherwise the original node is returned unchanged. + * + * @param relNode the node to remap + * @param remap optional field index remap + * @return remapped node or original node if no remap is present + */ protected RelNode applyRemap(RelNode relNode, Optional remap) { if (remap.isPresent()) { return applyRemap(relNode, remap.get()); @@ -873,8 +961,10 @@ private RelNode applyRemap(RelNode relNode, Rel.Remap remap) { /** A shared context for the Substrait to RelNode conversion. */ public static class Context implements VisitationContext { + /** Stack of outer row type range maps used to resolve correlated references. */ protected final Stack> outerRowTypes = new Stack<>(); + /** Stack of correlation ids collected while visiting subqueries. */ protected final Stack> correlationIds = new Stack<>(); private int subqueryDepth; @@ -912,6 +1002,7 @@ public void pushOuterRowType(final RelDataType... inputs) { this.correlationIds.push(new HashSet<>()); } + /** Pops the most recent outer row type from the stack. */ public void popOuterRowType() { outerRowTypes.pop(); } diff --git a/isthmus/src/main/java/io/substrait/isthmus/SubstraitRelVisitor.java b/isthmus/src/main/java/io/substrait/isthmus/SubstraitRelVisitor.java index 835d8493d..92a054648 100644 --- a/isthmus/src/main/java/io/substrait/isthmus/SubstraitRelVisitor.java +++ b/isthmus/src/main/java/io/substrait/isthmus/SubstraitRelVisitor.java @@ -68,6 +68,13 @@ import org.apache.calcite.util.ImmutableBitSet; import org.immutables.value.Value; +/** + * Visitor that converts Calcite {@link RelNode} trees to Substrait {@link Rel} relations and + * optionally wraps them in {@link Plan.Root}. + * + *

Supports scalar, aggregate, window functions, joins, projections, filters, sorting, set ops, + * and DDL/DML conversions. Behavior is controlled via {@link FeatureBoard}. + */ @SuppressWarnings("UnstableApiUsage") @Value.Enclosing public class SubstraitRelVisitor extends RelNodeVisitor { @@ -75,17 +82,38 @@ public class SubstraitRelVisitor extends RelNodeVisitor { private static final FeatureBoard FEATURES_DEFAULT = ImmutableFeatureBoard.builder().build(); private static final Expression.BoolLiteral TRUE = ExpressionCreator.bool(false, true); + /** Converter for Calcite {@link RexNode} to Substrait {@link Expression}. */ protected final RexExpressionConverter rexExpressionConverter; + + /** Converter for {@link AggregateCall} to Substrait aggregate invocation. */ protected final AggregateFunctionConverter aggregateFunctionConverter; + + /** Converter for Calcite {@link RelDataType} to Substrait {@link Type}. */ protected final TypeConverter typeConverter; + + /** Feature flags governing conversion. */ protected final FeatureBoard featureBoard; + private Map fieldAccessDepthMap; + /** + * Creates a visitor with default features. + * + * @param typeFactory Calcite type factory. + * @param extensions Substrait extension collection. + */ public SubstraitRelVisitor( RelDataTypeFactory typeFactory, SimpleExtension.ExtensionCollection extensions) { this(typeFactory, extensions, FEATURES_DEFAULT); } + /** + * Creates a visitor with explicit features. + * + * @param typeFactory Calcite type factory. + * @param extensions Substrait extension collection. + * @param features Feature flags. + */ public SubstraitRelVisitor( RelDataTypeFactory typeFactory, SimpleExtension.ExtensionCollection extensions, @@ -125,6 +153,16 @@ public SubstraitRelVisitor( this.featureBoard = features; } + /** + * Creates a visitor with custom converters and features. + * + * @param typeFactory Calcite type factory. + * @param scalarFunctionConverter Converter for scalar functions. + * @param aggregateFunctionConverter Converter for aggregate functions. + * @param windowFunctionConverter Converter for window functions. + * @param typeConverter Converter for types. + * @param features Feature flags. + */ public SubstraitRelVisitor( RelDataTypeFactory typeFactory, ScalarFunctionConverter scalarFunctionConverter, @@ -143,10 +181,22 @@ public SubstraitRelVisitor( this.featureBoard = features; } + /** + * Converts a {@link RexNode} to a Substrait {@link Expression}. + * + * @param node Rex expression node. + * @return Substrait expression. + */ protected Expression toExpression(RexNode node) { return node.accept(rexExpressionConverter); } + /** + * Converts a Calcite {@link org.apache.calcite.rel.core.TableScan}. + * + * @param scan Calcite table scan. + * @return Substrait named scan. + */ @Override public Rel visit(org.apache.calcite.rel.core.TableScan scan) { NamedStruct type = typeConverter.toNamedStruct(scan.getRowType()); @@ -156,11 +206,23 @@ public Rel visit(org.apache.calcite.rel.core.TableScan scan) { .build(); } + /** + * Converts a Calcite {@link org.apache.calcite.rel.core.TableFunctionScan}. + * + * @param scan Calcite table function scan. + * @return Converted relation or {@code super.visit(scan)}. + */ @Override public Rel visit(org.apache.calcite.rel.core.TableFunctionScan scan) { return super.visit(scan); } + /** + * Converts a Calcite {@link org.apache.calcite.rel.core.Values}. + * + * @param values Calcite values relation. + * @return Substrait scan (empty or virtual table). + */ @Override public Rel visit(org.apache.calcite.rel.core.Values values) { NamedStruct type = typeConverter.toNamedStruct(values.getRowType()); @@ -182,17 +244,35 @@ public Rel visit(org.apache.calcite.rel.core.Values values) { return VirtualTableScan.builder().initialSchema(type).addAllRows(structs).build(); } + /** + * Converts a Calcite {@link org.apache.calcite.rel.core.Filter}. + * + * @param filter Calcite filter relation. + * @return Substrait filter. + */ @Override public Rel visit(org.apache.calcite.rel.core.Filter filter) { Expression condition = toExpression(filter.getCondition()); return Filter.builder().condition(condition).input(apply(filter.getInput())).build(); } + /** + * Converts a Calcite {@link org.apache.calcite.rel.core.Calc}. + * + * @param calc Calcite calc relation. + * @return Converted relation or {@code super.visit(calc)}. + */ @Override public Rel visit(org.apache.calcite.rel.core.Calc calc) { return super.visit(calc); } + /** + * Converts a Calcite {@link org.apache.calcite.rel.core.Project}. + * + * @param project Calcite project relation. + * @return Substrait project. + */ @Override public Rel visit(org.apache.calcite.rel.core.Project project) { List expressions = @@ -214,6 +294,12 @@ public Rel visit(org.apache.calcite.rel.core.Project project) { .build(); } + /** + * Converts a Calcite {@link org.apache.calcite.rel.core.Join}. + * + * @param join Calcite join relation. + * @return Substrait join or cross. + */ @Override public Rel visit(org.apache.calcite.rel.core.Join join) { Rel left = apply(join.getLeft()); @@ -248,6 +334,12 @@ private Join.JoinType asJoinType(org.apache.calcite.rel.core.Join join) { throw new UnsupportedOperationException("Unsupported join type: " + join.getJoinType()); } + /** + * Converts a Calcite {@link org.apache.calcite.rel.core.Correlate}. + * + * @param correlate Calcite correlate relation. + * @return Converted relation or {@code super.visit(correlate)}. + */ @Override public Rel visit(org.apache.calcite.rel.core.Correlate correlate) { // left input of correlated-join is similar to the left input of a logical join @@ -259,6 +351,12 @@ public Rel visit(org.apache.calcite.rel.core.Correlate correlate) { return super.visit(correlate); } + /** + * Converts a Calcite {@link org.apache.calcite.rel.core.Union}. + * + * @param union Calcite union relation. + * @return Substrait set-union. + */ @Override public Rel visit(org.apache.calcite.rel.core.Union union) { List inputs = apply(union.getInputs()); @@ -266,6 +364,12 @@ public Rel visit(org.apache.calcite.rel.core.Union union) { return Set.builder().inputs(inputs).setOp(setOp).build(); } + /** + * Converts a Calcite {@link org.apache.calcite.rel.core.Intersect}. + * + * @param intersect Calcite intersect relation. + * @return Substrait set-intersection. + */ @Override public Rel visit(org.apache.calcite.rel.core.Intersect intersect) { List inputs = apply(intersect.getInputs()); @@ -274,6 +378,12 @@ public Rel visit(org.apache.calcite.rel.core.Intersect intersect) { return Set.builder().inputs(inputs).setOp(setOp).build(); } + /** + * Converts a Calcite {@link org.apache.calcite.rel.core.Minus}. + * + * @param minus Calcite minus relation. + * @return Substrait set-minus. + */ @Override public Rel visit(org.apache.calcite.rel.core.Minus minus) { List inputs = apply(minus.getInputs()); @@ -281,6 +391,13 @@ public Rel visit(org.apache.calcite.rel.core.Minus minus) { return Set.builder().inputs(inputs).setOp(setOp).build(); } + /** + * Converts a Calcite {@link org.apache.calcite.rel.core.Aggregate}. + * + * @param aggregate Calcite aggregate relation. + * @return Substrait aggregate. + * @throws IllegalStateException if unexpected remap state is encountered. + */ @Override public Rel visit(org.apache.calcite.rel.core.Aggregate aggregate) { Rel input = apply(aggregate.getInput()); @@ -379,11 +496,23 @@ Aggregate.Measure fromAggCall(RelNode input, Type.Struct inputType, AggregateCal return builder.build(); } + /** + * Converts a Calcite {@link org.apache.calcite.rel.core.Match}. + * + * @param match Calcite match relation. + * @return Converted relation or {@code super.visit(match)}. + */ @Override public Rel visit(org.apache.calcite.rel.core.Match match) { return super.visit(match); } + /** + * Converts a Calcite {@link org.apache.calcite.rel.core.Sort}. + * + * @param sort Calcite sort relation. + * @return Substrait sort/fetch chain. + */ @Override public Rel visit(org.apache.calcite.rel.core.Sort sort) { Rel input = apply(sort.getInput()); @@ -425,6 +554,13 @@ private long asLong(RexNode rex) { throw new UnsupportedOperationException("Unknown type: " + rex); } + /** + * Converts a Calcite sort collation to a Substrait {@link Expression.SortField}. + * + * @param collation Calcite field collation. + * @param inputType Input record type. + * @return Substrait sort field. + */ public static Expression.SortField toSortField( RelFieldCollation collation, Type.Struct inputType) { Expression.SortDirection direction = asSortDirection(collation); @@ -453,11 +589,24 @@ private static Expression.SortDirection asSortDirection(RelFieldCollation collat throw new IllegalArgumentException("Unsupported collation direction: " + direction); } + /** + * Converts a Calcite {@link org.apache.calcite.rel.core.Exchange}. + * + * @param exchange Calcite exchange relation. + * @return Converted relation or {@code super.visit(exchange)}. + */ @Override public Rel visit(org.apache.calcite.rel.core.Exchange exchange) { return super.visit(exchange); } + /** + * Converts a Calcite {@link TableModify} (INSERT/DELETE/UPDATE). + * + * @param modify Calcite table modify node. + * @return Substrait write/update relation. + * @throws IllegalStateException if an update column is not found in the table schema. + */ @Override public Rel visit(TableModify modify) { switch (modify.getOperation()) { @@ -566,6 +715,12 @@ private NamedStruct getSchema(final RelNode queryRelRoot) { return typeConverter.toNamedStruct(rowType); } + /** + * Handles Calcite {@link CreateTable} as Substrait CTAS. + * + * @param createTable Calcite create-table node. + * @return Substrait CTAS write relation. + */ public Rel handleCreateTable(CreateTable createTable) { RelNode input = createTable.getInput(); Rel inputRel = apply(input); @@ -580,6 +735,12 @@ public Rel handleCreateTable(CreateTable createTable) { .build(); } + /** + * Handles Calcite {@link CreateView} as Substrait view DDL. + * + * @param createView Calcite create-view node. + * @return Substrait view DDL relation. + */ public Rel handleCreateView(CreateView createView) { RelNode input = createView.getInput(); Rel inputRel = apply(input); @@ -596,6 +757,13 @@ public Rel handleCreateView(CreateView createView) { .build(); } + /** + * Visits other Calcite nodes (e.g., DDL wrappers). + * + * @param other Calcite node. + * @return Converted relation. + * @throws UnsupportedOperationException if the node type is unsupported. + */ @Override public Rel visitOther(RelNode other) { if (other instanceof CreateTable) { @@ -607,20 +775,43 @@ public Rel visitOther(RelNode other) { throw new UnsupportedOperationException("Unable to handle node: " + other); } + /** + * Precomputes depth for outer field accesses used by correlated expressions. + * + * @param root Root Calcite node to analyze. + */ protected void popFieldAccessDepthMap(RelNode root) { final OuterReferenceResolver resolver = new OuterReferenceResolver(); resolver.apply(root); fieldAccessDepthMap = resolver.getFieldAccessDepthMap(); } + /** + * Returns the depth of a field access for correlated expressions. + * + * @param fieldAccess Rex field access. + * @return Depth value, or {@code null} if unknown. + */ public Integer getFieldAccessDepth(RexFieldAccess fieldAccess) { return fieldAccessDepthMap.get(fieldAccess); } + /** + * Applies the visitor to a Calcite {@link RelNode}. + * + * @param r Calcite node. + * @return Converted Substrait relation. + */ public Rel apply(RelNode r) { return reverseAccept(r); } + /** + * Applies the visitor to a list of Calcite {@link RelNode}s. + * + * @param inputs Calcite input relations. + * @return Converted Substrait relations. + */ public List apply(List inputs) { return inputs.stream() .map(inputRel -> apply(inputRel)) diff --git a/isthmus/src/main/java/io/substrait/isthmus/SubstraitToCalcite.java b/isthmus/src/main/java/io/substrait/isthmus/SubstraitToCalcite.java index 772a3e192..a04781fc3 100644 --- a/isthmus/src/main/java/io/substrait/isthmus/SubstraitToCalcite.java +++ b/isthmus/src/main/java/io/substrait/isthmus/SubstraitToCalcite.java @@ -34,17 +34,39 @@ */ public class SubstraitToCalcite { + /** Substrait extension collection used for function/operator mappings. */ protected final SimpleExtension.ExtensionCollection extensions; + + /** Calcite type factory for creating and managing relational types. */ protected final RelDataTypeFactory typeFactory; + + /** Converter for translating Substrait types to Calcite types. */ protected final TypeConverter typeConverter; + + /** Catalog reader for schema resolution during conversion. */ protected final Prepare.CatalogReader catalogReader; + + /** Feature flags controlling Substrait-to-Calcite conversion behavior. */ protected final FeatureBoard featureBoard; + /** + * Creates a Substrait-to-Calcite converter with default type converter and no catalog reader. + * + * @param extensions Substrait extension collection. + * @param typeFactory Calcite type factory. + */ public SubstraitToCalcite( SimpleExtension.ExtensionCollection extensions, RelDataTypeFactory typeFactory) { this(extensions, typeFactory, TypeConverter.DEFAULT, null); } + /** + * Creates a Substrait-to-Calcite converter with default type converter and a catalog reader. + * + * @param extensions Substrait extension collection. + * @param typeFactory Calcite type factory. + * @param catalogReader Calcite catalog reader for schema resolution. + */ public SubstraitToCalcite( SimpleExtension.ExtensionCollection extensions, RelDataTypeFactory typeFactory, @@ -52,6 +74,13 @@ public SubstraitToCalcite( this(extensions, typeFactory, TypeConverter.DEFAULT, catalogReader); } + /** + * Creates a Substrait-to-Calcite converter with a custom type converter and no catalog reader. + * + * @param extensions Substrait extension collection. + * @param typeFactory Calcite type factory. + * @param typeConverter Converter for Substrait types to Calcite types. + */ public SubstraitToCalcite( SimpleExtension.ExtensionCollection extensions, RelDataTypeFactory typeFactory, @@ -59,6 +88,14 @@ public SubstraitToCalcite( this(extensions, typeFactory, typeConverter, null); } + /** + * Creates a Substrait-to-Calcite converter with a custom type converter and catalog reader. + * + * @param extensions Substrait extension collection. + * @param typeFactory Calcite type factory. + * @param typeConverter Converter for Substrait types to Calcite types. + * @param catalogReader Calcite catalog reader for schema resolution. + */ public SubstraitToCalcite( SimpleExtension.ExtensionCollection extensions, RelDataTypeFactory typeFactory, @@ -72,6 +109,15 @@ public SubstraitToCalcite( ImmutableFeatureBoard.builder().build()); } + /** + * Creates a Substrait-to-Calcite converter with full configuration. + * + * @param extensions Substrait extension collection. + * @param typeFactory Calcite type factory. + * @param typeConverter Converter for Substrait types to Calcite types. + * @param catalogReader Calcite catalog reader for schema resolution. + * @param featureBoard Feature flags controlling conversion behavior. + */ public SubstraitToCalcite( SimpleExtension.ExtensionCollection extensions, RelDataTypeFactory typeFactory, @@ -89,6 +135,9 @@ public SubstraitToCalcite( * Extracts a {@link CalciteSchema} from a {@link Rel} * *

Override this method to customize schema extraction. + * + * @param rel The Substrait {@link Rel} root to analyze. + * @return The extracted {@link CalciteSchema}. */ protected CalciteSchema toSchema(Rel rel) { SchemaCollector schemaCollector = new SchemaCollector(typeFactory, typeConverter); @@ -99,6 +148,9 @@ protected CalciteSchema toSchema(Rel rel) { * Creates a {@link RelBuilder} from the extracted {@link CalciteSchema} * *

Override this method to customize the {@link RelBuilder}. + * + * @param schema The extracted {@link CalciteSchema} used as the default schema. + * @return A configured {@link RelBuilder}. */ protected RelBuilder createRelBuilder(CalciteSchema schema) { return RelBuilder.create(Frameworks.newConfigBuilder().defaultSchema(schema.plus()).build()); @@ -108,6 +160,9 @@ protected RelBuilder createRelBuilder(CalciteSchema schema) { * Creates a {@link SubstraitRelNodeConverter} from the {@link RelBuilder} * *

Override this method to customize the {@link SubstraitRelNodeConverter}. + * + * @param relBuilder The {@link RelBuilder} used to build Calcite relational nodes. + * @return A configured {@link SubstraitRelNodeConverter}. */ protected SubstraitRelNodeConverter createSubstraitRelNodeConverter(RelBuilder relBuilder) { return new SubstraitRelNodeConverter(extensions, typeFactory, relBuilder, featureBoard); diff --git a/isthmus/src/main/java/io/substrait/isthmus/SubstraitToSql.java b/isthmus/src/main/java/io/substrait/isthmus/SubstraitToSql.java index e327ab007..90a7a95ef 100644 --- a/isthmus/src/main/java/io/substrait/isthmus/SubstraitToSql.java +++ b/isthmus/src/main/java/io/substrait/isthmus/SubstraitToSql.java @@ -5,16 +5,36 @@ import org.apache.calcite.prepare.Prepare; import org.apache.calcite.rel.RelNode; +/** + * Converts Substrait {@link Rel} plans to Calcite {@link RelNode} and then to SQL. + * + *

Uses {@link SqlConverterBase} as the base for SQL conversion and supports optional extensions. + */ public class SubstraitToSql extends SqlConverterBase { + /** Creates a Substrait-to-SQL converter with default features. */ public SubstraitToSql() { super(FEATURES_DEFAULT); } + /** + * Creates a Substrait-to-SQL converter with default features and custom extensions. + * + * @param extensions Substrait extension collection for function/operator mappings. + */ public SubstraitToSql(SimpleExtension.ExtensionCollection extensions) { super(FEATURES_DEFAULT, extensions); } + /** + * Converts a Substrait {@link Rel} to a Calcite {@link RelNode}. + * + *

This is the first step before generating SQL from Substrait plans. + * + * @param relRoot The Substrait relational root to convert. + * @param catalog The Calcite catalog reader for schema resolution. + * @return A Calcite {@link RelNode} representing the converted Substrait plan. + */ public RelNode substraitRelToCalciteRel(Rel relRoot, Prepare.CatalogReader catalog) { return SubstraitRelNodeConverter.convert( relRoot, relOptCluster, catalog, parserConfig, extensionCollection); diff --git a/isthmus/src/main/java/io/substrait/isthmus/SubstraitTypeSystem.java b/isthmus/src/main/java/io/substrait/isthmus/SubstraitTypeSystem.java index 97498e457..514a36d3c 100644 --- a/isthmus/src/main/java/io/substrait/isthmus/SubstraitTypeSystem.java +++ b/isthmus/src/main/java/io/substrait/isthmus/SubstraitTypeSystem.java @@ -9,21 +9,37 @@ import org.apache.calcite.sql.parser.SqlParserPos; import org.apache.calcite.sql.type.SqlTypeName; +/** + * Custom {@link RelDataTypeSystem} implementation for Substrait. + * + *

Defines type system rules such as precision, scale, and interval qualifiers for Substrait + * integration with Calcite. + */ public class SubstraitTypeSystem extends RelDataTypeSystemImpl { + + /** Singleton instance of Substrait type system. */ public static final RelDataTypeSystem TYPE_SYSTEM = new SubstraitTypeSystem(); + /** Default type factory using the Substrait type system. */ public static final RelDataTypeFactory TYPE_FACTORY = new JavaTypeFactoryImpl(TYPE_SYSTEM); - // Interval qualifier from year to month + /** Interval qualifier from year to month. */ public static final SqlIntervalQualifier YEAR_MONTH_INTERVAL = new SqlIntervalQualifier(TimeUnit.YEAR, TimeUnit.MONTH, SqlParserPos.ZERO); - // Interval qualifier from day to fractional second at microsecond precision + /** Interval qualifier from day to fractional second at microsecond precision. */ public static final SqlIntervalQualifier DAY_SECOND_INTERVAL = new SqlIntervalQualifier(TimeUnit.DAY, -1, TimeUnit.SECOND, 6, SqlParserPos.ZERO); + /** Private constructor to enforce singleton usage. */ private SubstraitTypeSystem() {} + /** + * Returns the maximum precision for the given SQL type. + * + * @param typeName The {@link SqlTypeName} for which precision is requested. + * @return Maximum precision for the type. + */ @Override public int getMaxPrecision(final SqlTypeName typeName) { switch (typeName) { @@ -39,16 +55,31 @@ public int getMaxPrecision(final SqlTypeName typeName) { return super.getMaxPrecision(typeName); } + /** + * Returns the maximum numeric scale supported by this type system. + * + * @return Maximum numeric scale (38). + */ @Override public int getMaxNumericScale() { return 38; } + /** + * Returns the maximum numeric precision supported by this type system. + * + * @return Maximum numeric precision (38). + */ @Override public int getMaxNumericPrecision() { return 38; } + /** + * Indicates whether ragged union types should be converted to varying types. + * + * @return {@code true}, as Substrait requires conversion to varying types. + */ @Override public boolean shouldConvertRaggedUnionTypesToVarying() { return true; diff --git a/isthmus/src/main/java/io/substrait/isthmus/TypeConverter.java b/isthmus/src/main/java/io/substrait/isthmus/TypeConverter.java index 932b8f6d8..1e182077d 100644 --- a/isthmus/src/main/java/io/substrait/isthmus/TypeConverter.java +++ b/isthmus/src/main/java/io/substrait/isthmus/TypeConverter.java @@ -20,11 +20,29 @@ import org.apache.calcite.sql.type.SqlTypeName; import org.jspecify.annotations.Nullable; +/** + * Utility for converting between Calcite {@link org.apache.calcite.rel.type.RelDataType} and + * Substrait {@link io.substrait.type.Type}. + * + *

Supports primitive, complex, and user * + * + *

Supports primitive, complex, and user-defined types in both directions. + * + * @see UserTypeMapper + * @see io.substrait.type.Type + * @see org.apache.calcite.rel.type.RelDataType + */ public class TypeConverter { private final UserTypeMapper userTypeMapper; // DEFAULT TypeConverter which does not handle user-defined types + /** + * Default {@link TypeConverter} instance that does not handle user-defined types. + * + *

Both {@link UserTypeMapper#toSubstrait(RelDataType)} and {@link + * UserTypeMapper#toCalcite(Type.UserDefined)} return {@code null} in this default configuration. + */ public static TypeConverter DEFAULT = new TypeConverter( new UserTypeMapper() { @@ -41,14 +59,39 @@ public RelDataType toCalcite(Type.UserDefined type) { } }); + /** + * Creates a {@link TypeConverter} with a provided user type mapper. + * + * @param userTypeMapper Mapper for converting user-defined types between Calcite and Substrait. + */ public TypeConverter(UserTypeMapper userTypeMapper) { this.userTypeMapper = userTypeMapper; } + /** + * Converts a Calcite {@link RelDataType} to a Substrait {@link Type}. + * + * @param type Calcite type to convert. + * @return Corresponding Substrait type. + * @throws UnsupportedOperationException if the type cannot be converted or has unsupported + * properties. + */ public Type toSubstrait(RelDataType type) { return toSubstrait(type, new ArrayList<>()); } + /** + * Converts a Calcite {@link RelDataType} of SQL type {@link SqlTypeName#ROW} to a Substrait + * {@link NamedStruct}. + * + *

Field names are extracted from the Calcite struct type and paired with the converted + * Substrait struct. + * + * @param type Calcite struct type ({@link SqlTypeName#ROW}). + * @return Substrait {@link NamedStruct} containing field names and struct type. + * @throws IllegalArgumentException if {@code type} is not a struct ({@code ROW}). + * @throws UnsupportedOperationException if any child field type cannot be converted. + */ public NamedStruct toNamedStruct(RelDataType type) { if (type.getSqlTypeName() != SqlTypeName.ROW) { throw new IllegalArgumentException("Expected type of struct."); @@ -153,11 +196,31 @@ private Type toSubstrait(RelDataType type, List names) { } } + /** + * Converts a Substrait {@link TypeExpression} to a Calcite {@link RelDataType}. + * + * @param relDataTypeFactory Calcite type factory. + * @param typeExpression Substrait type expression to convert. + * @return Calcite relational type. + * @throws UnsupportedOperationException if the expression contains unsupported precision or + * user-defined types cannot be mapped. + */ public RelDataType toCalcite( RelDataTypeFactory relDataTypeFactory, TypeExpression typeExpression) { return toCalcite(relDataTypeFactory, typeExpression, null); } + /** + * Converts a Substrait {@link TypeExpression} to a Calcite {@link RelDataType}, with optional + * field names for DFS/nested structs. + * + * @param relDataTypeFactory Calcite type factory. + * @param typeExpression Substrait type expression to convert. + * @param dfsFieldNames Optional list of field names to apply to struct fields, in DFS order. + * @return Calcite relational type. + * @throws UnsupportedOperationException if the expression contains unsupported precision or + * user-defined types cannot be mapped. + */ public RelDataType toCalcite( RelDataTypeFactory relDataTypeFactory, TypeExpression typeExpression, diff --git a/isthmus/src/main/java/io/substrait/isthmus/Utils.java b/isthmus/src/main/java/io/substrait/isthmus/Utils.java index 3382007f1..9804954eb 100644 --- a/isthmus/src/main/java/io/substrait/isthmus/Utils.java +++ b/isthmus/src/main/java/io/substrait/isthmus/Utils.java @@ -12,6 +12,11 @@ import org.apache.calcite.jdbc.CalciteSchema; import org.jspecify.annotations.NonNull; +/** + * Utility helpers for Substrait conversions and Calcite schema management. + * + *

Includes helpers for computing cartesian products and building hierarchical Calcite schemas. + */ public class Utils { /** * Compute the cartesian product for n lists. @@ -19,6 +24,12 @@ public class Utils { *

Based on Soln by * Thomas Preissler + * + * @param element type contained within each list. + * @param lists A list of lists whose cross product is computed. Null or empty inner lists are + * skipped. + * @return A stream of lists representing the cartesian product (each output list has one element + * from each input list), or an empty stream if {@code lists} is empty. */ public static Stream> crossProduct(List> lists) { diff --git a/isthmus/src/main/java/io/substrait/isthmus/calcite/SubstraitOperatorTable.java b/isthmus/src/main/java/io/substrait/isthmus/calcite/SubstraitOperatorTable.java index 259ae5b1c..8d0d93fd2 100644 --- a/isthmus/src/main/java/io/substrait/isthmus/calcite/SubstraitOperatorTable.java +++ b/isthmus/src/main/java/io/substrait/isthmus/calcite/SubstraitOperatorTable.java @@ -20,11 +20,17 @@ import org.jspecify.annotations.Nullable; /** - * Overrides SQL operator lookups to return Substrait specific functions variants (e.g. {@link - * AggregateFunctions#MAX}} when they are available. + * SQL operator table that prioritizes Substrait-specific operator variants where available, falling + * back to extended library operators and the standard Calcite operator table. + * + *

Overrides lookups to return Substrait variants first (e.g., {@link AggregateFunctions#MAX}), + * ensuring deterministic resolution when multiple implementations exist. + * + * @see SqlOperatorTable */ public class SubstraitOperatorTable implements SqlOperatorTable { + /** Singleton instance of the Substrait operator table. */ public static SubstraitOperatorTable INSTANCE = new SubstraitOperatorTable(); private static final SqlOperatorTable SUBSTRAIT_OPERATOR_TABLE = @@ -69,6 +75,19 @@ public class SubstraitOperatorTable implements SqlOperatorTable { private SubstraitOperatorTable() {} + /** + * Looks up operators by name and syntax, preferring Substrait variants first, then library + * operators, and finally the standard operator table. + * + *

If a Substrait operator match is found, it is returned immediately to avoid ambiguous + * resolution when multiple matches exist. + * + * @param opName The operator name as a {@link SqlIdentifier}. + * @param category Optional {@link SqlFunctionCategory} to narrow the lookup; may be {@code null}. + * @param syntax The {@link SqlSyntax} (e.g., FUNCTION, BINARY, SPECIAL). + * @param operatorList Output list to which matching {@link SqlOperator}s are added. + * @param nameMatcher The {@link SqlNameMatcher} used to match names. + */ @Override public void lookupOperatorOverloads( SqlIdentifier opName, @@ -95,6 +114,12 @@ public void lookupOperatorOverloads( opName, category, syntax, operatorList, nameMatcher); } + /** + * Returns the combined operator list, including Substrait operators, extended library operators, + * and standard operators (excluding kinds overridden by Substrait). + * + * @return Immutable list of all available {@link SqlOperator}s. + */ @Override public List getOperatorList() { return OPERATOR_LIST; diff --git a/isthmus/src/main/java/io/substrait/isthmus/calcite/SubstraitSchema.java b/isthmus/src/main/java/io/substrait/isthmus/calcite/SubstraitSchema.java index 8530fe661..07010dda2 100644 --- a/isthmus/src/main/java/io/substrait/isthmus/calcite/SubstraitSchema.java +++ b/isthmus/src/main/java/io/substrait/isthmus/calcite/SubstraitSchema.java @@ -6,30 +6,51 @@ import org.apache.calcite.schema.Table; import org.apache.calcite.schema.impl.AbstractSchema; -/** Basic {@link AbstractSchema} implementation */ +/** + * Basic {@link AbstractSchema} implementation for Substrait. + * + *

Provides mappings for tables and subschemas, allowing integration with Calcite's schema + * framework. + */ public class SubstraitSchema extends AbstractSchema { - /** Map of table names to their associated tables */ + /** Map of table names to their associated tables. */ protected final Map tableMap; - /** Map of schema names to their associated schemas */ + /** Map of schema names to their associated schemas. */ protected final Map schemaMap; + /** Creates an empty Substrait schema with no tables or subschemas. */ public SubstraitSchema() { this.tableMap = new HashMap<>(); this.schemaMap = new HashMap<>(); } + /** + * Creates a Substrait schema with the specified table map. + * + * @param tableMap A map of table names to {@link Table} instances. + */ public SubstraitSchema(Map tableMap) { this.tableMap = tableMap; this.schemaMap = new HashMap<>(); } + /** + * Returns the map of table names to tables. + * + * @return A {@link Map} of table names to {@link Table} instances. + */ @Override public Map getTableMap() { return tableMap; } + /** + * Returns the map of schema names to subschemas. + * + * @return A {@link Map} of schema names to {@link Schema} instances. + */ @Override protected Map getSubSchemaMap() { return schemaMap; diff --git a/isthmus/src/main/java/io/substrait/isthmus/calcite/SubstraitTable.java b/isthmus/src/main/java/io/substrait/isthmus/calcite/SubstraitTable.java index f642c73d8..813e72dba 100644 --- a/isthmus/src/main/java/io/substrait/isthmus/calcite/SubstraitTable.java +++ b/isthmus/src/main/java/io/substrait/isthmus/calcite/SubstraitTable.java @@ -4,21 +4,45 @@ import org.apache.calcite.rel.type.RelDataTypeFactory; import org.apache.calcite.schema.impl.AbstractTable; -/** Basic {@link AbstractTable} implementation */ +/** + * Basic {@link AbstractTable} implementation for Substrait. + * + *

Represents a table with a fixed name and row type, used for schema integration with Calcite. + */ public class SubstraitTable extends AbstractTable { + /** The row type of the table. */ private final RelDataType rowType; + + /** The name of the table. */ private final String tableName; + /** + * Creates a Substrait table with the given name and row type. + * + * @param tableName The name of the table. + * @param rowType The Calcite {@link RelDataType} representing the table's row type. + */ public SubstraitTable(String tableName, RelDataType rowType) { this.tableName = tableName; this.rowType = rowType; } + /** + * Returns the name of the table. + * + * @return The table name as a {@link String}. + */ public String getName() { return tableName; } + /** + * Returns the row type of the table. + * + * @param typeFactory The Calcite type factory (ignored in this implementation). + * @return The {@link RelDataType} representing the table's row type. + */ @Override public RelDataType getRowType(RelDataTypeFactory typeFactory) { return rowType; diff --git a/isthmus/src/main/java/io/substrait/isthmus/calcite/rel/CreateTable.java b/isthmus/src/main/java/io/substrait/isthmus/calcite/rel/CreateTable.java index 66a030b8b..50f574028 100644 --- a/isthmus/src/main/java/io/substrait/isthmus/calcite/rel/CreateTable.java +++ b/isthmus/src/main/java/io/substrait/isthmus/calcite/rel/CreateTable.java @@ -6,11 +6,23 @@ import org.apache.calcite.rel.RelWriter; import org.apache.calcite.rel.type.RelDataType; +/** + * Synthetic relational node representing a {@code CREATE TABLE AS SELECT} operation. + * + *

Holds the target table name and the input relation whose schema/data will be used to create + * the table. + */ public class CreateTable extends AbstractRelNode { private final List tableName; private final RelNode input; + /** + * Constructs a {@code CreateTable} node with the given table name and input relation. + * + * @param tableName fully qualified table name parts (e.g., schema and table) + * @param input input relational node supplying schema and data + */ public CreateTable(List tableName, RelNode input) { super(input.getCluster(), input.getTraitSet()); @@ -18,25 +30,51 @@ public CreateTable(List tableName, RelNode input) { this.input = input; } + /** + * Derives the row type from the input relation. + * + * @return the input {@link RelNode}'s row type + */ @Override protected RelDataType deriveRowType() { return input.getRowType(); } + /** + * Explains the node terms for plan output. + * + * @param pw plan writer + * @return the plan writer with this node's fields added + */ @Override public RelWriter explainTerms(RelWriter pw) { return super.explainTerms(pw).input("input", getInput()).item("tableName", getTableName()); } + /** + * Returns the inputs to this node (single input). + * + * @return a list containing the input relation + */ @Override public List getInputs() { return List.of(input); } + /** + * Returns the fully qualified table name parts. + * + * @return table name components (e.g., [schema, table]) + */ public List getTableName() { return tableName; } + /** + * Returns the input relation for the CTAS operation. + * + * @return input {@link RelNode} + */ public RelNode getInput() { return input; } diff --git a/isthmus/src/main/java/io/substrait/isthmus/calcite/rel/CreateView.java b/isthmus/src/main/java/io/substrait/isthmus/calcite/rel/CreateView.java index ef1e228cb..bffaf0784 100644 --- a/isthmus/src/main/java/io/substrait/isthmus/calcite/rel/CreateView.java +++ b/isthmus/src/main/java/io/substrait/isthmus/calcite/rel/CreateView.java @@ -6,35 +6,72 @@ import org.apache.calcite.rel.RelWriter; import org.apache.calcite.rel.type.RelDataType; +/** + * Synthetic relational node representing a {@code CREATE VIEW AS SELECT} operation. + * + *

Stores the view name and the input relation that defines the view. + */ public class CreateView extends AbstractRelNode { private final List viewName; private final RelNode input; + /** + * Constructs a {@code CreateView} node with the given view name and input relation. + * + * @param viewName fully qualified view name parts (e.g., schema and view) + * @param input input relational node defining the view + */ public CreateView(List viewName, RelNode input) { super(input.getCluster(), input.getTraitSet()); this.viewName = viewName; this.input = input; } + /** + * Derives the row type from the input relation. + * + * @return the input {@link RelNode}'s row type + */ @Override protected RelDataType deriveRowType() { return input.getRowType(); } + /** + * Explains the node terms for plan output. + * + * @param pw plan writer + * @return the plan writer with this node's fields added + */ @Override public RelWriter explainTerms(RelWriter pw) { return super.explainTerms(pw).input("input", getInput()).item("viewName", getViewName()); } + /** + * Returns the inputs to this node (single input). + * + * @return a list containing the input relation + */ @Override public List getInputs() { return List.of(input); } + /** + * Returns the fully qualified view name parts. + * + * @return view name components (e.g., [schema, view]) + */ public List getViewName() { return viewName; } + /** + * Returns the input relation for the view definition. + * + * @return input {@link RelNode} + */ public RelNode getInput() { return input; } diff --git a/isthmus/src/main/java/io/substrait/isthmus/calcite/rel/DdlSqlToRelConverter.java b/isthmus/src/main/java/io/substrait/isthmus/calcite/rel/DdlSqlToRelConverter.java index 6a237b366..c963f027b 100644 --- a/isthmus/src/main/java/io/substrait/isthmus/calcite/rel/DdlSqlToRelConverter.java +++ b/isthmus/src/main/java/io/substrait/isthmus/calcite/rel/DdlSqlToRelConverter.java @@ -12,10 +12,22 @@ import org.apache.calcite.sql.util.SqlBasicVisitor; import org.apache.calcite.sql2rel.SqlToRelConverter; +/** + * Visitor that converts DDL {@link SqlCall}s to {@link RelRoot}, delegating to specific handlers + * for supported statements (CREATE TABLE AS SELECT, CREATE VIEW). + * + *

Non-DDL statements are passed through to {@link SqlToRelConverter#convertQuery(SqlNode, + * boolean, boolean)}. + */ public class DdlSqlToRelConverter extends SqlBasicVisitor { + /** + * Registry mapping DDL {@link SqlCall} classes to handler functions that convert them into {@link + * RelRoot} instances. + */ protected final Map, Function> ddlHandlers = new ConcurrentHashMap<>(); + private final SqlToRelConverter converter; private Function findDdlHandler(final SqlCall call) { @@ -30,6 +42,11 @@ private Function findDdlHandler(final SqlCall call) { return null; } + /** + * Creates a DDL SQL-to-Rel converter using the given {@link SqlToRelConverter}. + * + * @param converter the converter used for non-DDL and query parts of DDL (e.g., CTAS) + */ public DdlSqlToRelConverter(SqlToRelConverter converter) { this.converter = converter; @@ -37,6 +54,12 @@ public DdlSqlToRelConverter(SqlToRelConverter converter) { ddlHandlers.put(SqlCreateView.class, sqlCall -> handleCreateView((SqlCreateView) sqlCall)); } + /** + * Dispatches a {@link SqlCall} to an appropriate DDL handler; falls back to non-DDL handling. + * + * @param sqlCall the SQL call node + * @return the converted relational root + */ @Override public RelRoot visit(SqlCall sqlCall) { Function ddlHandler = findDdlHandler(sqlCall); @@ -46,10 +69,23 @@ public RelRoot visit(SqlCall sqlCall) { return handleNonDdl(sqlCall); } + /** + * Handles non-DDL SQL nodes via the underlying {@link SqlToRelConverter}. + * + * @param sqlNode the SQL node to convert + * @return the converted relational root + */ protected RelRoot handleNonDdl(final SqlNode sqlNode) { return converter.convertQuery(sqlNode, true, true); } + /** + * Handles {@code CREATE TABLE AS SELECT} statements. + * + * @param sqlCreateTable the CREATE TABLE node + * @return a {@link RelRoot} wrapping a synthetic {@code CreateTable} relational node + * @throws IllegalArgumentException if the statement is not CTAS + */ protected RelRoot handleCreateTable(final SqlCreateTable sqlCreateTable) { if (sqlCreateTable.query == null) { throw new IllegalArgumentException("Only create table as select statements are supported"); @@ -58,6 +94,13 @@ protected RelRoot handleCreateTable(final SqlCreateTable sqlCreateTable) { return RelRoot.of(new CreateTable(sqlCreateTable.name.names, input), sqlCreateTable.getKind()); } + /** + * Handles {@code CREATE VIEW} statements. + * + * @param sqlCreateView the CREATE VIEW node + * @return a {@link RelRoot} wrapping a synthetic {@code CreateTable} relational node representing + * the view definition + */ protected RelRoot handleCreateView(final SqlCreateView sqlCreateView) { final RelNode input = converter.convertQuery(sqlCreateView.query, true, true).rel; return RelRoot.of(new CreateTable(sqlCreateView.name.names, input), sqlCreateView.getKind()); diff --git a/isthmus/src/main/java/io/substrait/isthmus/expression/AggregateFunctionConverter.java b/isthmus/src/main/java/io/substrait/isthmus/expression/AggregateFunctionConverter.java index 8d81b0b00..6f8918179 100644 --- a/isthmus/src/main/java/io/substrait/isthmus/expression/AggregateFunctionConverter.java +++ b/isthmus/src/main/java/io/substrait/isthmus/expression/AggregateFunctionConverter.java @@ -24,22 +24,47 @@ import org.apache.calcite.sql.SqlAggFunction; import org.apache.calcite.sql.fun.SqlStdOperatorTable; +/** + * Converts Calcite {@link AggregateCall} instances into Substrait aggregate {@link + * AggregateFunctionInvocation}s using configured function variants and signatures. + * + *

Handles special cases (e.g., approximate distinct count) and collation/sort fields. + */ public class AggregateFunctionConverter extends FunctionConverter< SimpleExtension.AggregateFunctionVariant, AggregateFunctionInvocation, AggregateFunctionConverter.WrappedAggregateCall> { + /** + * Returns the supported aggregate signatures used for matching functions. + * + * @return immutable list of aggregate signatures + */ @Override protected ImmutableList getSigs() { return FunctionMappings.AGGREGATE_SIGS; } + /** + * Creates a converter with the given function variants and type factory. + * + * @param functions available aggregate function variants + * @param typeFactory Calcite type factory + */ public AggregateFunctionConverter( List functions, RelDataTypeFactory typeFactory) { super(functions, typeFactory); } + /** + * Creates a converter with additional signatures and a type converter. + * + * @param functions available aggregate function variants + * @param additionalSignatures extra signatures to consider + * @param typeFactory Calcite type factory + * @param typeConverter Substrait type converter + */ public AggregateFunctionConverter( List functions, List additionalSignatures, @@ -48,6 +73,15 @@ public AggregateFunctionConverter( super(functions, additionalSignatures, typeFactory, typeConverter); } + /** + * Builds a Substrait aggregate invocation from the matched call and arguments. + * + * @param call wrapped aggregate call + * @param function matched Substrait function variant + * @param arguments converted arguments + * @param outputType result type of the invocation + * @return aggregate function invocation + */ @Override protected AggregateFunctionInvocation generateBinding( WrappedAggregateCall call, @@ -75,6 +109,15 @@ protected AggregateFunctionInvocation generateBinding( arguments); } + /** + * Attempts to convert a Calcite aggregate call to a Substrait invocation. + * + * @param input input relational node + * @param inputType Substrait input struct type + * @param call Calcite aggregate call + * @param topLevelConverter converter for RexNodes to Expressions + * @return optional Substrait aggregate invocation + */ public Optional convert( RelNode input, Type.Struct inputType, @@ -93,6 +136,12 @@ public Optional convert( return m.attemptMatch(wrapped, topLevelConverter); } + /** + * Resolves the appropriate function finder, applying Substrait-specific variants when needed. + * + * @param call Calcite aggregate call + * @return function finder for the resolved aggregate function, or {@code null} if none + */ protected FunctionFinder getFunctionFinder(AggregateCall call) { // replace COUNT() + distinct == true and approximate == true with APPROX_COUNT_DISTINCT // before converting into substrait function @@ -108,12 +157,21 @@ protected FunctionFinder getFunctionFinder(AggregateCall call) { return signatures.get(lookupFunction); } + /** Lightweight wrapper around {@link AggregateCall} providing operands and type access. */ static class WrappedAggregateCall implements FunctionConverter.GenericCall { private final AggregateCall call; private final RelNode input; private final RexBuilder rexBuilder; private final Type.Struct inputType; + /** + * Creates a new wrapped aggregate call. + * + * @param call underlying Calcite aggregate call + * @param input input relational node + * @param rexBuilder Rex builder for operand construction + * @param inputType Substrait input struct type + */ private WrappedAggregateCall( AggregateCall call, RelNode input, RexBuilder rexBuilder, Type.Struct inputType) { this.call = call; @@ -122,15 +180,30 @@ private WrappedAggregateCall( this.inputType = inputType; } + /** + * Returns operands as input references over the argument list. + * + * @return stream of RexNode operands + */ @Override public Stream getOperands() { return call.getArgList().stream().map(r -> rexBuilder.makeInputRef(input, r)); } + /** + * Exposes the underlying Calcite aggregate call. + * + * @return the aggregate call + */ public AggregateCall getUnderlying() { return call; } + /** + * Returns the type of the aggregate call result. + * + * @return Calcite result type + */ @Override public RelDataType getType() { return call.getType(); diff --git a/isthmus/src/main/java/io/substrait/isthmus/expression/CallConverters.java b/isthmus/src/main/java/io/substrait/isthmus/expression/CallConverters.java index 247d4b9ce..68d4b7f6b 100644 --- a/isthmus/src/main/java/io/substrait/isthmus/expression/CallConverters.java +++ b/isthmus/src/main/java/io/substrait/isthmus/expression/CallConverters.java @@ -17,8 +17,24 @@ import org.apache.calcite.sql.SqlKind; import org.jspecify.annotations.Nullable; +/** + * Collection of small, composable {@link CallConverter}s for common Calcite {@link RexCall}s (e.g., + * CAST, CASE, REINTERPRET, SEARCH). Each converter returns a Substrait {@link Expression} or {@code + * null} when the call is not handled. + * + *

Use {@link #defaults(TypeConverter)} to get a standard set. + */ public class CallConverters { + /** + * Converter for {@link SqlKind#CAST} and {@link SqlKind#SAFE_CAST} to Substrait {@link + * Expression.Cast}. + * + *

On SAFE_CAST, sets {@link Expression.FailureBehavior#RETURN_NULL}; otherwise + * THROW_EXCEPTION. + * + * @see ExpressionCreator#cast(Type, Expression, Expression.FailureBehavior) + */ public static Function CAST = typeConverter -> (call, visitor) -> { @@ -53,6 +69,8 @@ public class CallConverters { * *

When converting from Calcite to Substrait, this call converter extracts the {@link * Expression.UserDefinedLiteral} that was stored. + * + * @see Expression.UserDefinedLiteral */ public static Function REINTERPRET = typeConverter -> @@ -89,7 +107,7 @@ public class CallConverters { // return null; // }; // } - /** */ + /** Converter for {@link SqlKind#CASE} to Substrait {@link Expression.IfThen}. */ public static SimpleCallConverter CASE = (call, visitor) -> { if (call.getKind() != SqlKind.CASE) { @@ -122,6 +140,9 @@ public class CallConverters { * Expand {@link org.apache.calcite.util.Sarg} values in a calcite `SqlSearchOperator` into * simpler expressions. The expansion logic is encoded in {@link RexUtil#expandSearch(RexBuilder, * RexProgram, RexNode)} + * + *

Returns a factory of {@link SimpleCallConverter} that expands SEARCH calls using the + * provided {@link RexBuilder} */ public static Function CREATE_SEARCH_CONV = (RexBuilder rexBuilder) -> @@ -135,6 +156,12 @@ public class CallConverters { } }; + /** + * Returns the default set of converters for common calls. + * + * @param typeConverter type mapper between Substrait and Calcite types + * @return list of default {@link CallConverter}s + */ public static List defaults(TypeConverter typeConverter) { return ImmutableList.of( new FieldSelectionConverter(typeConverter), @@ -145,10 +172,27 @@ public static List defaults(TypeConverter typeConverter) { new SqlMapValueConstructorCallConverter()); } + /** Minimal interface for single-call converters used by {@link CallConverter}. */ public interface SimpleCallConverter extends CallConverter { + /** + * Converts a given {@link RexCall} to a Substrait {@link Expression}, or returns {@code null} + * if not handled. + * + * @param call the Calcite call to convert + * @param topLevelConverter converter for nested {@link RexNode} operands + * @return converted expression, or {@code null} if not applicable + */ @Nullable Expression apply(RexCall call, Function topLevelConverter); + /** + * Default adapter to {@link CallConverter#convert(RexCall, Function)} returning {@link + * Optional#empty()} when {@link #apply(RexCall, Function)} returns {@code null}. + * + * @param call the Calcite call to convert + * @param topLevelConverter converter for nested {@link RexNode} operands + * @return optional converted expression + */ @Override default Optional convert( RexCall call, Function topLevelConverter) { diff --git a/isthmus/src/main/java/io/substrait/isthmus/expression/ExpressionRexConverter.java b/isthmus/src/main/java/io/substrait/isthmus/expression/ExpressionRexConverter.java index 6c0c5ee7a..be97a76d8 100644 --- a/isthmus/src/main/java/io/substrait/isthmus/expression/ExpressionRexConverter.java +++ b/isthmus/src/main/java/io/substrait/isthmus/expression/ExpressionRexConverter.java @@ -80,13 +80,34 @@ public class ExpressionRexConverter private static final long MILLIS_IN_DAY = TimeUnit.DAYS.toMillis(1); + /** Calcite {@link RelDataTypeFactory} used for creating and managing relational types. */ protected final RelDataTypeFactory typeFactory; + + /** Converter for mapping between Substrait and Calcite types. */ protected final TypeConverter typeConverter; + + /** Calcite {@link RexBuilder} for constructing {@link org.apache.calcite.rex.RexNode}s. */ protected final RexBuilder rexBuilder; + + /** Converter for Substrait scalar function invocations to Calcite {@link SqlOperator}s. */ protected final ScalarFunctionConverter scalarFunctionConverter; + + /** Converter for Substrait window function invocations to Calcite {@link SqlOperator}s. */ protected final WindowFunctionConverter windowFunctionConverter; + + /** Converter for Substrait relational nodes to Calcite {@link RelNode}s, used for subqueries. */ protected SubstraitRelNodeConverter relNodeConverter; + /** + * Creates an {@code ExpressionRexConverter} for converting Substrait expressions to Calcite Rex + * nodes. + * + * @param typeFactory Calcite {@link org.apache.calcite.rel.type.RelDataTypeFactory} for type + * creation + * @param scalarFunctionConverter converter for scalar function invocations + * @param windowFunctionConverter converter for window function invocations + * @param typeConverter converter for Substrait ↔ Calcite type mappings + */ public ExpressionRexConverter( RelDataTypeFactory typeFactory, ScalarFunctionConverter scalarFunctionConverter, @@ -99,6 +120,11 @@ public ExpressionRexConverter( this.windowFunctionConverter = windowFunctionConverter; } + /** + * Sets the {@link SubstraitRelNodeConverter} used for converting subqueries. + * + * @param substraitRelNodeConverter converter for Substrait relational nodes + */ public void setRelNodeConverter(final SubstraitRelNodeConverter substraitRelNodeConverter) { this.relNodeConverter = substraitRelNodeConverter; } diff --git a/isthmus/src/main/java/io/substrait/isthmus/expression/FieldSelectionConverter.java b/isthmus/src/main/java/io/substrait/isthmus/expression/FieldSelectionConverter.java index 14d3b8dfa..4893d1ef5 100644 --- a/isthmus/src/main/java/io/substrait/isthmus/expression/FieldSelectionConverter.java +++ b/isthmus/src/main/java/io/substrait/isthmus/expression/FieldSelectionConverter.java @@ -15,17 +15,42 @@ import org.slf4j.Logger; import org.slf4j.LoggerFactory; -/** Converts field selections from Calcite representation. */ +/** + * Converts Calcite {@link RexCall} ITEM operators into Substrait {@link FieldReference} + * expressions. + * + *

Handles dereferencing of ROW, ARRAY, and MAP types using literal indices or keys. + */ public class FieldSelectionConverter implements CallConverter { private static final Logger LOGGER = LoggerFactory.getLogger(FieldSelectionConverter.class); private final TypeConverter typeConverter; + /** + * Creates a converter for field selection operations. + * + * @param typeConverter converter for Substrait ↔ Calcite type mappings + */ public FieldSelectionConverter(TypeConverter typeConverter) { super(); this.typeConverter = typeConverter; } + /** + * Converts a Calcite ITEM operator into a Substrait {@link FieldReference}, if applicable. + * + *

Supports: + * + *

    + *
  • ROW dereference by integer index + *
  • ARRAY dereference by integer index + *
  • MAP dereference by string key + *
+ * + * @param call the Calcite ITEM operator call + * @param topLevelConverter function to convert nested operands + * @return an {@link Optional} containing the converted expression, or empty if not applicable + */ @Override public Optional convert( RexCall call, Function topLevelConverter) { @@ -96,6 +121,12 @@ public Optional convert( return Optional.empty(); } + /** + * Converts a numeric literal to an integer index. + * + * @param l literal to convert + * @return optional integer value, empty if not numeric + */ private Optional toInt(Expression.Literal l) { if (l instanceof Expression.I8Literal) { return Optional.of(((Expression.I8Literal) l).value()); @@ -110,6 +141,12 @@ private Optional toInt(Expression.Literal l) { return Optional.empty(); } + /** + * Converts a fixed-char literal to a string key. + * + * @param l literal to convert + * @return optional string value, empty if not a fixed-char literal + */ public Optional toString(Expression.Literal l) { if (!(l instanceof Expression.FixedCharLiteral)) { LOGGER.atWarn().log("Literal expected to be char type but was not. {}", l); diff --git a/isthmus/src/main/java/io/substrait/isthmus/expression/FunctionConverter.java b/isthmus/src/main/java/io/substrait/isthmus/expression/FunctionConverter.java index b5604d4d9..422288f94 100644 --- a/isthmus/src/main/java/io/substrait/isthmus/expression/FunctionConverter.java +++ b/isthmus/src/main/java/io/substrait/isthmus/expression/FunctionConverter.java @@ -17,7 +17,6 @@ import io.substrait.isthmus.TypeConverter; import io.substrait.isthmus.Utils; import io.substrait.isthmus.expression.FunctionMappings.Sig; -import io.substrait.isthmus.expression.FunctionMappings.TypeBasedResolver; import io.substrait.type.Type; import io.substrait.util.Util; import java.util.ArrayList; @@ -49,60 +48,63 @@ * Abstract base class for converting between Calcite {@link SqlOperator}s and Substrait function * invocations. * - *

This class handles bidirectional conversion: + *

Supports Calcite → Substrait conversion via signature matching/coercion and Substrait → + * Calcite lookup via function keys. * - *

    - *
  • Calcite → Substrait: Subclasses implement {@code convert()} methods to convert - * Calcite calls to Substrait function invocations - *
  • Substrait → Calcite: {@link #getSqlOperatorFromSubstraitFunc} converts Substrait - * function keys to Calcite {@link SqlOperator}s - *
- * - *

When multiple functions with the same name and signature are passed into the constructor, a - * last-wins precedence strategy is used for resolution. The last function in the input list - * takes precedence during Calcite to Substrait conversion. - * - * @param the function type (ScalarFunctionVariant, AggregateFunctionVariant, etc.) - * @param the return type for Calcite→Substrait conversion - * @param the call type being converted + * @param function variant type (e.g., ScalarFunctionVariant, AggregateFunctionVariant) + * @param return type produced when binding Substrait invocations + * @param generic call wrapper exposing operands and type */ public abstract class FunctionConverter< F extends SimpleExtension.Function, T, C extends FunctionConverter.GenericCall> { private static final Logger LOGGER = LoggerFactory.getLogger(FunctionConverter.class); + /** + * Maps Calcite {@link SqlOperator}s to {@link FunctionFinder}s for signature-based matching. Used + * to locate Substrait functions based on Calcite calls and operand shapes. + */ protected final Map signatures; + + /** Calcite {@link RelDataTypeFactory} used for creating and inspecting relational types. */ protected final RelDataTypeFactory typeFactory; + + /** Converter handling Substrait ↔ Calcite type mappings and nullability rules. */ protected final TypeConverter typeConverter; + + /** + * Calcite {@link org.apache.calcite.rex.RexBuilder} for constructing {@link + * org.apache.calcite.rex.RexNode}s. + */ protected final RexBuilder rexBuilder; + /** + * Multimap from Substrait function key (e.g., canonical name) to Calcite {@link SqlOperator}s. + * Enables reverse lookup when converting Substrait function invocations to Calcite operators. + */ protected final Multimap substraitFuncKeyToSqlOperatorMap; /** - * Creates a FunctionConverter with the given functions. + * Creates a converter with the given functions. * - *

If there are multiple functions provided with the same name and signature (e.g., from - * different extension URNs), the last one in the list will be given precedence during Calcite to - * Substrait conversion. + *

Last-wins precedence applies when multiple variants share the same name/signature. * - * @param functions the list of function variants to register - * @param typeFactory the Calcite type factory + * @param functions function variants to register + * @param typeFactory Calcite type factory */ public FunctionConverter(List functions, RelDataTypeFactory typeFactory) { this(functions, Collections.EMPTY_LIST, typeFactory, TypeConverter.DEFAULT); } /** - * Creates a FunctionConverter with the given functions and additional signatures. + * Creates a converter with functions and additional operator signatures. * - *

If there are multiple functions provided with the same name and signature (e.g., from - * different extension URNs), the last one in the list will be given precedence during Calcite to - * Substrait conversion. + *

Last-wins precedence applies when multiple variants share the same name/signature. * - * @param functions the list of function variants to register - * @param additionalSignatures additional Calcite operator signatures to map - * @param typeFactory the Calcite type factory - * @param typeConverter the type converter to use + * @param functions function variants to register + * @param additionalSignatures extra Calcite operator signatures to map + * @param typeFactory Calcite type factory + * @param typeConverter type converter to Substrait */ public FunctionConverter( List functions, @@ -155,18 +157,14 @@ public FunctionConverter( } /** - * Converts a Substrait function to a Calcite {@link SqlOperator} (Substrait → Calcite direction). - * - *

Given a Substrait function key (e.g., "concat:str_str") and output type, this method finds - * the corresponding Calcite {@link SqlOperator}. When multiple operators match, the output type - * is used to disambiguate. + * Resolves a Calcite {@link SqlOperator} from a Substrait function key (Substrait → Calcite). * - * @param key the Substrait function key (function name with type signature) - * @param outputType the expected output type - * @return the matching {@link SqlOperator}, or empty if no match found + * @param key Substrait function key (e.g., {@code concat:str_str}) + * @param outputType expected Substrait output type used for disambiguation + * @return matching {@link SqlOperator}, or empty if none */ public Optional getSqlOperatorFromSubstraitFunc(String key, Type outputType) { - Map resolver = getTypeBasedResolver(); + Map resolver = getTypeBasedResolver(); Collection operators = substraitFuncKeyToSqlOperatorMap.get(key); if (operators.isEmpty()) { return Optional.empty(); @@ -198,12 +196,30 @@ public Optional getSqlOperatorFromSubstraitFunc(String key, Type ou return Optional.empty(); } + /** + * Returns the resolver used to disambiguate Calcite operators by output type. + * + * @return map from {@link SqlOperator} to type-based resolver + */ private Map getTypeBasedResolver() { return FunctionMappings.OPERATOR_RESOLVER; } + /** + * Provides the set of Calcite operator signatures supported by this converter. + * + * @return immutable list of supported signatures + */ protected abstract ImmutableList getSigs(); + /** + * Helper class for locating and matching Calcite {@link org.apache.calcite.sql.SqlOperator} + * signatures to Substrait functions. + * + *

Used during expression conversion to determine if a given {@link + * org.apache.calcite.rex.RexCall} corresponds to a known Substrait function and to validate + * argument counts. + */ protected class FunctionFinder { private final String substraitName; private final SqlOperator operator; @@ -212,6 +228,13 @@ protected class FunctionFinder { private final Optional> singularInputType; private final Util.IntRange argRange; + /** + * Creates a function finder for a Substrait name/operator over given variants. + * + * @param substraitName canonical Substrait function name + * @param operator Calcite operator being matched + * @param functions registered function variants for this name + */ public FunctionFinder(String substraitName, SqlOperator operator, List functions) { this.substraitName = substraitName; this.operator = operator; @@ -232,10 +255,23 @@ public FunctionFinder(String substraitName, SqlOperator operator, List functi this.directMap = directMap.build(); } + /** + * Returns whether the given argument count is within this operator's allowed range. + * + * @param count number of operands + * @return {@code true} if allowed; otherwise {@code false} + */ public boolean allowedArgCount(int count) { return argRange.within(count); } + /** + * Attempts an exact signature match against required arguments and return type. + * + * @param inputTypes operand types (Substrait) + * @param outputType expected output type (Substrait) + * @return matching function variant if found; otherwise empty + */ private Optional signatureMatch(List inputTypes, Type outputType) { for (F function : functions) { List args = function.requiredArguments(); @@ -251,17 +287,13 @@ && inputTypesMatchDefinedArguments(inputTypes, args)) { } /** - * Checks to see if the given input types satisfy the function arguments given. Checks that + * Checks that input types satisfy the function's required arguments. * - *

    - *
  • Variadic arguments all have the same input type - *
  • Matched wildcard arguments (i.e.`any`, `any1`, `any2`, etc) all have the same input - * type - *
+ *

Ensures variadic arguments share a type and matched wildcards (anyN) are consistent. * - * @param inputTypes input types to check against arguments + * @param inputTypes operand types to verify * @param args expected arguments as defined in a {@link SimpleExtension.Function} - * @return true if the {@code inputTypes} satisfy the {@code args}, false otherwise + * @return {@code true} if compatible; otherwise {@code false} */ private boolean inputTypesMatchDefinedArguments( List inputTypes, List args) { @@ -296,11 +328,10 @@ private boolean inputTypesMatchDefinedArguments( } /** - * If some of the function variants for this function name have single, repeated argument type, - * we will attempt to find matches using these patterns and least-restrictive casting. + * Derives singular-argument matchers for variants whose required arguments share one type. * - *

If this exists, the function finder will attempt to find a least-restrictive match using - * these. + * @param functions variants to inspect + * @return optional matcher chain; empty if none */ private Optional> getSingularInputType(List functions) { List> matchers = new ArrayList<>(); @@ -343,6 +374,13 @@ private Optional> getSingularInputType(List functi } } + /** + * Creates a matcher for a single repeated parameter type. + * + * @param function function variant + * @param type repeated parameter type + * @return matcher accepting input/output types + */ private SingularArgumentMatcher singular(F function, ParameterizedType type) { return (inputType, outputType) -> { boolean check = isMatch(inputType, type); @@ -353,6 +391,12 @@ private SingularArgumentMatcher singular(F function, ParameterizedType type) }; } + /** + * Chains multiple singular matchers, returning the first successful match. + * + * @param matchers matchers to try in order + * @return composite matcher + */ private SingularArgumentMatcher chained(List> matchers) { return (inputType, outputType) -> { for (SingularArgumentMatcher s : matchers) { @@ -370,6 +414,13 @@ private SingularArgumentMatcher chained(List> matc * In case of a `RexLiteral` of an Enum value try both `req` and `op` signatures * for that argument position. */ + /** + * Produces candidate signature keys considering enum literals as required/optional. + * + * @param rexOperands operand RexNodes + * @param opTypes operand type strings (Substrait) + * @return stream of candidate key suffixes to test + */ private Stream matchKeys(List rexOperands, List opTypes) { assert (rexOperands.size() == opTypes.size()); @@ -396,17 +447,13 @@ private Stream matchKeys(List rexOperands, List opTypes } /** - * Converts a Calcite call to a Substrait function invocation (Calcite → Substrait direction). + * Converts a Calcite call to a Substrait function invocation (Calcite → Substrait). * - *

This method tries to find a matching Substrait function for the given Calcite call using - * direct signature matching, type coercion, and least-restrictive type resolution. + *

Tries direct signature match, then coercion, then least-restrictive type resolution. * - *

If multiple registered function extensions have the same name and signature, the last one - * in the list passed into the constructor will be matched. - * - * @param call the Calcite call to match - * @param topLevelConverter function to convert RexNode operands to Substrait Expressions - * @return the matched Substrait function binding, or empty if no match found + * @param call generic call wrapper (operands and type) + * @param topLevelConverter converter from {@link RexNode} to Substrait {@link Expression} + * @return matched binding, or empty if none */ public Optional attemptMatch(C call, Function topLevelConverter) { @@ -480,6 +527,14 @@ public Optional attemptMatch(C call, Function topLevelCo return Optional.empty(); } + /** + * Tries matching using Calcite's least-restrictive type for operands. + * + * @param call generic call wrapper + * @param outputType expected output type (Substrait) + * @param operands converted operand expressions + * @return binding if a singular-type variant matches; otherwise empty + */ private Optional matchByLeastRestrictive( C call, Type outputType, List operands) { RelDataType leastRestrictive = @@ -499,6 +554,14 @@ private Optional matchByLeastRestrictive( }); } + /** + * Tries matching by coercing each operand to its Substrait type and checking signatures. + * + * @param call generic call wrapper + * @param outputType expected output type (Substrait) + * @param expressions operand expressions + * @return binding if a signature match is found; otherwise empty + */ private Optional matchCoerced(C call, Type outputType, List expressions) { // Convert the operands to the proper Substrait type List operandTypes = @@ -520,29 +583,64 @@ private Optional matchCoerced(C call, Type outputType, List expre return Optional.of(generateBinding(call, matchFunction.get(), coercedArgs, outputType)); } + /** + * Returns the canonical Substrait name this finder resolves. + * + * @return Substrait function name + */ protected String getSubstraitName() { return substraitName; } + /** + * Returns the Calcite operator associated with this finder. + * + * @return Calcite operator + */ public SqlOperator getOperator() { return operator; } } + /** + * Represents a generic function or operator call abstraction used during expression conversion. + * + *

Provides access to the operands and the resulting Calcite type of the call. + */ public interface GenericCall { + /** + * Returns the operand stream for this call. + * + * @return stream of {@link RexNode} operands + */ Stream getOperands(); + /** + * Returns the Calcite result type of the call. + * + * @return {@link RelDataType} for the call + */ RelDataType getType(); } /** - * Coerced types according to an expected output type. Coercion is only done for type mismatches, - * not for nullability or parameter mismatches. + * Coerces arguments to the target type when mismatched (ignores nullability/parameters). + * + * @param arguments input expressions + * @param targetType target Substrait type + * @return list of coerced expressions (casts applied as needed) */ private static List coerceArguments(List arguments, Type targetType) { return arguments.stream().map(a -> coerceArgument(a, targetType)).collect(Collectors.toList()); } + /** + * Coerces a single expression to the target type, if needed. + * + * @param argument expression to coerce + * @param type target Substrait type + * @return original expression or casted expression + */ private static Expression coerceArgument(Expression argument, Type type) { if (isMatch(type, argument.getType())) { return argument; @@ -551,14 +649,43 @@ private static Expression coerceArgument(Expression argument, Type type) { return ExpressionCreator.cast(type, argument, Expression.FailureBehavior.THROW_EXCEPTION); } + /** + * Creates the Substrait binding for a matched function variant. + * + * @param call generic call wrapper (operands and type) + * @param function matched extension function variant + * @param arguments converted function arguments + * @param outputType expected Substrait output type + * @return binding to return to the caller + */ protected abstract T generateBinding( C call, F function, List arguments, Type outputType); + /** + * Matcher for functions whose required arguments share a single repeated type. + * + * @param function variant type + */ @FunctionalInterface private interface SingularArgumentMatcher { + /** + * Attempts a match for the provided input/output types. + * + * @param type singular input type + * @param outputType expected output type + * @return matching function if successful; otherwise empty + */ Optional tryMatch(Type type, Type outputType); } + /** + * Compares parameterized types, allowing wildcards and ignoring nullability/parameters when + * appropriate. + * + * @param actualType actual parameterized type + * @param targetType target parameterized type + * @return {@code true} if compatible; otherwise {@code false} + */ private static boolean isMatch(ParameterizedType actualType, ParameterizedType targetType) { if (targetType.isWildcard()) { return true; diff --git a/isthmus/src/main/java/io/substrait/isthmus/expression/FunctionMappings.java b/isthmus/src/main/java/io/substrait/isthmus/expression/FunctionMappings.java index 8bb41ff39..ef9816bb1 100644 --- a/isthmus/src/main/java/io/substrait/isthmus/expression/FunctionMappings.java +++ b/isthmus/src/main/java/io/substrait/isthmus/expression/FunctionMappings.java @@ -9,10 +9,17 @@ import org.apache.calcite.sql.fun.SqlLibraryOperators; import org.apache.calcite.sql.fun.SqlStdOperatorTable; +/** + * Defines static mappings between Calcite {@link SqlOperator} signatures and Substrait base + * function names, including scalar, aggregate, and window function signatures. + * + *

Also provides type-based resolvers to disambiguate operators by output type. + */ public class FunctionMappings { // Static list of signature mapping between Calcite SQL operators and Substrait base function // names. + /** Scalar operator signatures mapped to Substrait function names. */ public static final ImmutableList SCALAR_SIGS = ImmutableList.builder() .add( @@ -100,6 +107,7 @@ public class FunctionMappings { s(SqlLibraryOperators.RPAD, "rpad")) .build(); + /** Aggregate operator signatures mapped to Substrait function names. */ public static final ImmutableList AGGREGATE_SIGS = ImmutableList.builder() .add( @@ -112,6 +120,7 @@ public class FunctionMappings { s(AggregateFunctions.AVG, "avg")) .build(); + /** Window function signatures (including supported aggregates) mapped to Substrait names. */ public static final ImmutableList WINDOW_SIGS = ImmutableList.builder() .add( @@ -131,6 +140,7 @@ public class FunctionMappings { .build(); // contains return-type based resolver for both scalar and aggregator operator + /** Type-based resolvers to disambiguate Calcite operators by expected output type. */ public static final Map OPERATOR_RESOLVER = Map.of( SqlStdOperatorTable.PLUS, @@ -146,54 +156,120 @@ public class FunctionMappings { SqlStdOperatorTable.BIT_LEFT_SHIFT, resolver(SqlStdOperatorTable.BIT_LEFT_SHIFT, Set.of("i8", "i16", "i32", "i64"))); + /** + * Prints all scalar signatures (for quick inspection). + * + * @param args CLI arguments (unused) + */ public static void main(String[] args) { SCALAR_SIGS.forEach(System.out::println); } + /** + * Creates a signature mapping entry. + * + * @param operator the Calcite operator + * @param substraitName the Substrait base function name + * @return a {@link Sig} instance + */ public static Sig s(SqlOperator operator, String substraitName) { return new Sig(operator, substraitName.toLowerCase(Locale.ROOT)); } + /** + * Creates a signature mapping entry using the operator's own (lowercased) name. + * + * @param operator the Calcite operator + * @return a {@link Sig} instance + */ public static Sig s(SqlOperator operator) { return s(operator, operator.getName().toLowerCase(Locale.ROOT)); } + /** Simple signature tuple of operator to Substrait name. */ public static class Sig { + + /** SqlOperator. */ public final SqlOperator operator; + + /** Name. */ public final String name; + /** + * Constructs a signature entry. + * + * @param operator the Calcite operator + * @param name the Substrait function name + */ public Sig(final SqlOperator operator, final String name) { this.operator = operator; this.name = name; } + /** + * Returns the Substrait function name. + * + * @return the Substrait name + */ public String name() { return name; } + /** + * Returns the Calcite operator. + * + * @return the operator + */ public SqlOperator operator() { return operator; } } + /** + * Creates a type-based resolver for an operator. + * + * @param operator the Calcite operator + * @param outTypes the set of allowed output type strings (Substrait) + * @return a {@link TypeBasedResolver} + */ public static TypeBasedResolver resolver(SqlOperator operator, Set outTypes) { return new TypeBasedResolver(operator, outTypes); } + /** Disambiguates operators based on expected output type strings. */ public static class TypeBasedResolver { + /** SqlOperator. */ public final SqlOperator operator; + + /** Types. */ public final Set types; + /** + * Constructs a resolver. + * + * @param operator the Calcite operator + * @param types allowed output type strings + */ public TypeBasedResolver(final SqlOperator operator, final Set types) { this.operator = operator; this.types = types; } + /** + * Returns the operator this resolver applies to. + * + * @return the operator + */ public SqlOperator operator() { return operator; } + /** + * Returns the allowed output type strings. + * + * @return set of type strings + */ public Set types() { return types; } diff --git a/isthmus/src/main/java/io/substrait/isthmus/expression/IgnoreNullableAndParameters.java b/isthmus/src/main/java/io/substrait/isthmus/expression/IgnoreNullableAndParameters.java index f8b4be1dd..fc95d2f9b 100644 --- a/isthmus/src/main/java/io/substrait/isthmus/expression/IgnoreNullableAndParameters.java +++ b/isthmus/src/main/java/io/substrait/isthmus/expression/IgnoreNullableAndParameters.java @@ -4,232 +4,528 @@ import io.substrait.function.ParameterizedTypeVisitor; import io.substrait.type.Type; +/** + * Visitor that compares a given {@link ParameterizedType} against visited types, ignoring + * nullability and (where applicable) type parameters. + * + *

Intended for structural type compatibility checks where exact parameter values are not + * required. + */ public class IgnoreNullableAndParameters implements ParameterizedTypeVisitor { private final ParameterizedType typeToMatch; + /** + * Creates a visitor that will compare visited types against {@code typeToMatch}. + * + * @param typeToMatch the target type to compare against + */ public IgnoreNullableAndParameters(ParameterizedType typeToMatch) { this.typeToMatch = typeToMatch; } + /** + * Compares {@link Type.Bool} ignoring nullability. + * + * @param type boolean type + * @return {@code true} if {@code typeToMatch} is {@link Type.Bool} + */ @Override public Boolean visit(Type.Bool type) { return typeToMatch instanceof Type.Bool; } + /** + * Compares {@link Type.I8} ignoring nullability. + * + * @param type 8-bit integer type + * @return {@code true} if {@code typeToMatch} is {@link Type.I8} + */ @Override public Boolean visit(Type.I8 type) { return typeToMatch instanceof Type.I8; } + /** + * Compares {@link Type.I16} ignoring nullability. + * + * @param type 16-bit integer type + * @return {@code true} if {@code typeToMatch} is {@link Type.I16} + */ @Override public Boolean visit(Type.I16 type) { return typeToMatch instanceof Type.I16; } + /** + * Compares {@link Type.I32} ignoring nullability. + * + * @param type 32-bit integer type + * @return {@code true} if {@code typeToMatch} is {@link Type.I32} + */ @Override public Boolean visit(Type.I32 type) { return typeToMatch instanceof Type.I32; } + /** + * Compares {@link Type.I64} ignoring nullability. + * + * @param type 64-bit integer type + * @return {@code true} if {@code typeToMatch} is {@link Type.I64} + */ @Override public Boolean visit(Type.I64 type) { return typeToMatch instanceof Type.I64; } + /** + * Compares {@link Type.FP32} ignoring nullability. + * + * @param type 32-bit floating point type + * @return {@code true} if {@code typeToMatch} is {@link Type.FP32} + */ @Override public Boolean visit(Type.FP32 type) { return typeToMatch instanceof Type.FP32; } + /** + * Compares {@link Type.FP64} ignoring nullability. + * + * @param type 64-bit floating point type + * @return {@code true} if {@code typeToMatch} is {@link Type.FP64} + */ @Override public Boolean visit(Type.FP64 type) { return typeToMatch instanceof Type.FP64; } + /** + * Compares {@link Type.Str} ignoring nullability. + * + * @param type string type + * @return {@code true} if {@code typeToMatch} is {@link Type.Str} + */ @Override public Boolean visit(Type.Str type) { return typeToMatch instanceof Type.Str; } + /** + * Compares {@link Type.Binary} ignoring nullability. + * + * @param type binary (var-length) type + * @return {@code true} if {@code typeToMatch} is {@link Type.Binary} + */ @Override public Boolean visit(Type.Binary type) { return typeToMatch instanceof Type.Binary; } + /** + * Compares {@link Type.Date} ignoring nullability. + * + * @param type date type + * @return {@code true} if {@code typeToMatch} is {@link Type.Date} + */ @Override public Boolean visit(Type.Date type) { return typeToMatch instanceof Type.Date; } + /** + * Compares {@link Type.Time} ignoring nullability. + * + * @param type time type + * @return {@code true} if {@code typeToMatch} is {@link Type.Time} + */ @Override public Boolean visit(Type.Time type) { return typeToMatch instanceof Type.Time; } + /** + * Compares {@link Type.TimestampTZ} ignoring nullability. + * + * @param type timestamp-with-time-zone type + * @return {@code true} if {@code typeToMatch} is {@link Type.TimestampTZ} + */ @Override public Boolean visit(Type.TimestampTZ type) { return typeToMatch instanceof Type.TimestampTZ; } + /** + * Compares {@link Type.Timestamp} ignoring nullability. + * + * @param type timestamp type + * @return {@code true} if {@code typeToMatch} is {@link Type.Timestamp} + */ @Override public Boolean visit(Type.Timestamp type) { return typeToMatch instanceof Type.Timestamp; } + /** + * Compares {@link Type.IntervalYear} ignoring nullability. + * + * @param type year-month interval type + * @return {@code true} if {@code typeToMatch} is {@link Type.IntervalYear} + */ @Override public Boolean visit(Type.IntervalYear type) { return typeToMatch instanceof Type.IntervalYear; } + /** + * Compares {@link Type.IntervalDay} ignoring nullability and parameters. + * + * @param type day-time interval type + * @return {@code true} if {@code typeToMatch} is {@link Type.IntervalDay} or {@link + * ParameterizedType.IntervalDay} + */ @Override public Boolean visit(Type.IntervalDay type) { return typeToMatch instanceof Type.IntervalDay || typeToMatch instanceof ParameterizedType.IntervalDay; } + /** + * Compares {@link Type.IntervalCompound} ignoring nullability and parameters. + * + * @param type compound interval type + * @return {@code true} if {@code typeToMatch} is {@link Type.IntervalCompound} or {@link + * ParameterizedType.IntervalCompound} + */ @Override public Boolean visit(Type.IntervalCompound type) { return typeToMatch instanceof Type.IntervalCompound || typeToMatch instanceof ParameterizedType.IntervalCompound; } + /** + * Compares {@link Type.UUID} ignoring nullability. + * + * @param type UUID type + * @return {@code true} if {@code typeToMatch} is {@link Type.UUID} + */ @Override public Boolean visit(Type.UUID type) { return typeToMatch instanceof Type.UUID; } + /** + * Compares {@link Type.UserDefined} for exact equality (URI and name). + * + * @param type user-defined type + * @return {@code true} if {@code typeToMatch} equals {@code type} + * @throws RuntimeException if comparison cannot be performed + */ @Override public Boolean visit(Type.UserDefined type) throws RuntimeException { // Two user-defined types are equal if they have the same uri AND name return typeToMatch.equals(type); } + /** + * Compares {@link Type.FixedChar} ignoring parameters and nullability. + * + * @param type fixed CHAR type + * @return {@code true} if {@code typeToMatch} is {@link Type.FixedChar} or {@link + * ParameterizedType.FixedChar} + */ @Override public Boolean visit(Type.FixedChar type) { return typeToMatch instanceof Type.FixedChar || typeToMatch instanceof ParameterizedType.FixedChar; } + /** + * Compares {@link Type.VarChar} ignoring parameters and nullability. + * + * @param type variable CHAR type + * @return {@code true} if {@code typeToMatch} is {@link Type.VarChar} or {@link + * ParameterizedType.VarChar} + */ @Override public Boolean visit(Type.VarChar type) { return typeToMatch instanceof Type.VarChar || typeToMatch instanceof ParameterizedType.VarChar; } + /** + * Compares {@link Type.FixedBinary} ignoring parameters and nullability. + * + * @param type fixed BINARY type + * @return {@code true} if {@code typeToMatch} is {@link Type.FixedBinary} or {@link + * ParameterizedType.FixedBinary} + */ @Override public Boolean visit(Type.FixedBinary type) { return typeToMatch instanceof Type.FixedBinary || typeToMatch instanceof ParameterizedType.FixedBinary; } + /** + * Compares {@link Type.Decimal} ignoring parameters and nullability. + * + * @param type DECIMAL type + * @return {@code true} if {@code typeToMatch} is {@link Type.Decimal} or {@link + * ParameterizedType.Decimal} + */ @Override public Boolean visit(Type.Decimal type) { return typeToMatch instanceof Type.Decimal || typeToMatch instanceof ParameterizedType.Decimal; } + /** + * Compares {@link Type.PrecisionTime} ignoring parameters and nullability. + * + * @param type precision TIME type + * @return {@code true} if {@code typeToMatch} is {@link Type.PrecisionTime} or {@link + * ParameterizedType.PrecisionTime} + */ @Override public Boolean visit(Type.PrecisionTime type) { return typeToMatch instanceof Type.PrecisionTime || typeToMatch instanceof ParameterizedType.PrecisionTime; } + /** + * Compares {@link Type.PrecisionTimestamp} ignoring parameters and nullability. + * + * @param type precision TIMESTAMP type + * @return {@code true} if {@code typeToMatch} is {@link Type.PrecisionTimestamp} or {@link + * ParameterizedType.PrecisionTimestamp} + */ @Override public Boolean visit(Type.PrecisionTimestamp type) { return typeToMatch instanceof Type.PrecisionTimestamp || typeToMatch instanceof ParameterizedType.PrecisionTimestamp; } + /** + * Compares {@link Type.PrecisionTimestampTZ} ignoring parameters and nullability. + * + * @param type precision TIMESTAMP WITH LOCAL TIME ZONE type + * @return {@code true} if {@code typeToMatch} is {@link Type.PrecisionTimestampTZ} or {@link + * ParameterizedType.PrecisionTimestampTZ} + */ @Override public Boolean visit(Type.PrecisionTimestampTZ type) { return typeToMatch instanceof Type.PrecisionTimestampTZ || typeToMatch instanceof ParameterizedType.PrecisionTimestampTZ; } + /** + * Compares {@link Type.Struct} ignoring parameters and nullability. + * + * @param type STRUCT type + * @return {@code true} if {@code typeToMatch} is {@link Type.Struct} or {@link + * ParameterizedType.Struct} + */ @Override public Boolean visit(Type.Struct type) { return typeToMatch instanceof Type.Struct || typeToMatch instanceof ParameterizedType.Struct; } + /** + * Compares {@link Type.ListType} ignoring parameters and nullability. + * + * @param type LIST type + * @return {@code true} if {@code typeToMatch} is {@link Type.ListType} or {@link + * ParameterizedType.ListType} + */ @Override public Boolean visit(Type.ListType type) { return typeToMatch instanceof Type.ListType || typeToMatch instanceof ParameterizedType.ListType; } + /** + * Compares {@link Type.Map} ignoring parameters and nullability. + * + * @param type MAP type + * @return {@code true} if {@code typeToMatch} is {@link Type.Map} or {@link + * ParameterizedType.Map} + */ @Override public Boolean visit(Type.Map type) { return typeToMatch instanceof Type.Map || typeToMatch instanceof ParameterizedType.Map; } + /** + * Compares parameterized {@link ParameterizedType.FixedChar} ignoring parameters. + * + * @param expr fixed CHAR parameterized type + * @return {@code true} if {@code typeToMatch} is {@link Type.FixedChar} or {@link + * ParameterizedType.FixedChar} + * @throws RuntimeException if comparison cannot be performed + */ @Override public Boolean visit(ParameterizedType.FixedChar expr) throws RuntimeException { return typeToMatch instanceof Type.FixedChar || typeToMatch instanceof ParameterizedType.FixedChar; } + /** + * Compares parameterized {@link ParameterizedType.VarChar} ignoring parameters. + * + * @param expr VARCHAR parameterized type + * @return {@code true} if {@code typeToMatch} is {@link Type.VarChar} or {@link + * ParameterizedType.VarChar} + * @throws RuntimeException if comparison cannot be performed + */ @Override public Boolean visit(ParameterizedType.VarChar expr) throws RuntimeException { return typeToMatch instanceof Type.VarChar || typeToMatch instanceof ParameterizedType.VarChar; } + /** + * Compares parameterized {@link ParameterizedType.FixedBinary} ignoring parameters. + * + * @param expr fixed BINARY parameterized type + * @return {@code true} if {@code typeToMatch} is {@link Type.FixedBinary} or {@link + * ParameterizedType.FixedBinary} + * @throws RuntimeException if comparison cannot be performed + */ @Override public Boolean visit(ParameterizedType.FixedBinary expr) throws RuntimeException { return typeToMatch instanceof Type.FixedBinary || typeToMatch instanceof ParameterizedType.FixedBinary; } + /** + * Compares parameterized {@link ParameterizedType.Decimal} ignoring parameters. + * + * @param expr DECIMAL parameterized type + * @return {@code true} if {@code typeToMatch} is {@link Type.Decimal} or {@link + * ParameterizedType.Decimal} + * @throws RuntimeException if comparison cannot be performed + */ @Override public Boolean visit(ParameterizedType.Decimal expr) throws RuntimeException { return typeToMatch instanceof Type.Decimal || typeToMatch instanceof ParameterizedType.Decimal; } + /** + * Compares parameterized {@link ParameterizedType.IntervalDay} ignoring parameters. + * + * @param expr day-time interval parameterized type + * @return {@code true} if {@code typeToMatch} is {@link Type.IntervalDay} or {@link + * ParameterizedType.IntervalDay} + * @throws RuntimeException if comparison cannot be performed + */ @Override public Boolean visit(ParameterizedType.IntervalDay expr) throws RuntimeException { return typeToMatch instanceof Type.IntervalDay || typeToMatch instanceof ParameterizedType.IntervalDay; } + /** + * Compares parameterized {@link ParameterizedType.IntervalCompound} ignoring parameters. + * + * @param expr compound interval parameterized type + * @return {@code true} if {@code typeToMatch} is {@link Type.IntervalCompound} or {@link + * ParameterizedType.IntervalCompound} + * @throws RuntimeException if comparison cannot be performed + */ @Override public Boolean visit(ParameterizedType.IntervalCompound expr) throws RuntimeException { return typeToMatch instanceof Type.IntervalCompound || typeToMatch instanceof ParameterizedType.IntervalCompound; } + /** + * Compares parameterized {@link ParameterizedType.PrecisionTime} ignoring parameters. + * + * @param expr precision TIME parameterized type + * @return {@code true} if {@code typeToMatch} is {@link Type.PrecisionTime} or {@link + * ParameterizedType.PrecisionTime} + * @throws RuntimeException if comparison cannot be performed + */ @Override public Boolean visit(ParameterizedType.PrecisionTime expr) throws RuntimeException { return typeToMatch instanceof Type.PrecisionTime || typeToMatch instanceof ParameterizedType.PrecisionTime; } + /** + * Compares parameterized {@link ParameterizedType.PrecisionTimestamp} ignoring parameters. + * + * @param expr precision TIMESTAMP parameterized type + * @return {@code true} if {@code typeToMatch} is {@link Type.PrecisionTimestamp} or {@link + * ParameterizedType.PrecisionTimestamp} + * @throws RuntimeException if comparison cannot be performed + */ @Override public Boolean visit(ParameterizedType.PrecisionTimestamp expr) throws RuntimeException { return typeToMatch instanceof Type.PrecisionTimestamp || typeToMatch instanceof ParameterizedType.PrecisionTimestamp; } + /** + * Compares parameterized {@link ParameterizedType.PrecisionTimestampTZ} ignoring parameters. + * + * @param expr precision TIMESTAMP WITH LOCAL TIME ZONE parameterized type + * @return {@code true} if {@code typeToMatch} is {@link Type.PrecisionTimestampTZ} or {@link + * ParameterizedType.PrecisionTimestampTZ} + * @throws RuntimeException if comparison cannot be performed + */ @Override public Boolean visit(ParameterizedType.PrecisionTimestampTZ expr) throws RuntimeException { return typeToMatch instanceof Type.PrecisionTimestampTZ || typeToMatch instanceof ParameterizedType.PrecisionTimestampTZ; } + /** + * Compares parameterized {@link ParameterizedType.Struct} ignoring parameters. + * + * @param expr STRUCT parameterized type + * @return {@code true} if {@code typeToMatch} is {@link Type.Struct} or {@link + * ParameterizedType.Struct} + * @throws RuntimeException if comparison cannot be performed + */ @Override public Boolean visit(ParameterizedType.Struct expr) throws RuntimeException { return typeToMatch instanceof Type.Struct || typeToMatch instanceof ParameterizedType.Struct; } + /** + * Compares parameterized {@link ParameterizedType.ListType} ignoring parameters. + * + * @param expr LIST parameterized type + * @return {@code true} if {@code typeToMatch} is {@link Type.ListType} or {@link + * ParameterizedType.ListType} + * @throws RuntimeException if comparison cannot be performed + */ @Override public Boolean visit(ParameterizedType.ListType expr) throws RuntimeException { return typeToMatch instanceof Type.ListType || typeToMatch instanceof ParameterizedType.ListType; } + /** + * Compares parameterized {@link ParameterizedType.Map} ignoring parameters. + * + * @param expr MAP parameterized type + * @return {@code true} if {@code typeToMatch} is {@link Type.Map} or {@link + * ParameterizedType.Map} + * @throws RuntimeException if comparison cannot be performed + */ @Override public Boolean visit(ParameterizedType.Map expr) throws RuntimeException { return typeToMatch instanceof Type.Map || typeToMatch instanceof ParameterizedType.Map; } + /** + * String literal parameterized types are not considered a match in this visitor. + * + * @param stringLiteral string literal parameterized type + * @return always {@code false} + * @throws RuntimeException never thrown in current implementation + */ @Override public Boolean visit(ParameterizedType.StringLiteral stringLiteral) throws RuntimeException { return false; diff --git a/isthmus/src/main/java/io/substrait/isthmus/expression/ListSqlOperatorFunctions.java b/isthmus/src/main/java/io/substrait/isthmus/expression/ListSqlOperatorFunctions.java index 429e90c8b..6b91746cd 100644 --- a/isthmus/src/main/java/io/substrait/isthmus/expression/ListSqlOperatorFunctions.java +++ b/isthmus/src/main/java/io/substrait/isthmus/expression/ListSqlOperatorFunctions.java @@ -10,8 +10,29 @@ import org.apache.calcite.sql.fun.SqlMultisetSetOperator; import org.apache.calcite.sql.fun.SqlStdOperatorTable; +/** + * Utility class for listing all standard Calcite {@link SqlOperator} functions defined in {@link + * SqlStdOperatorTable}, excluding set operators and multiset operators. + * + *

The {@code main} method uses reflection to: + * + *

    + *
  • Collect all public static fields of type {@link SqlOperator} + *
  • Filter out {@link SqlSetOperator} and {@link SqlMultisetSetOperator} + *
  • Print the operator names and total count to standard output + *
+ * + *

This is primarily intended for debugging or inspection of available SQL operators. + */ public class ListSqlOperatorFunctions { + /** + * Entry point for listing Calcite SQL operators. + * + *

Prints all operator names and their total count to standard output. + * + * @param args command-line arguments (not used) + */ public static void main(String[] args) { Map operators = Arrays.stream(SqlStdOperatorTable.class.getFields()) @@ -35,6 +56,13 @@ public static void main(String[] args) { System.out.println("Operator count: " + operators.size()); } + /** + * Retrieves the {@link SqlOperator} instance from a given {@link Field}. + * + * @param f the field representing a Calcite SQL operator + * @return the {@link SqlOperator} instance + * @throws IllegalStateException if the field cannot be accessed + */ private static SqlOperator toOp(Field f) { try { return (SqlOperator) f.get(null); diff --git a/isthmus/src/main/java/io/substrait/isthmus/expression/LiteralConverter.java b/isthmus/src/main/java/io/substrait/isthmus/expression/LiteralConverter.java index 02cb8a116..f8c29fa8a 100644 --- a/isthmus/src/main/java/io/substrait/isthmus/expression/LiteralConverter.java +++ b/isthmus/src/main/java/io/substrait/isthmus/expression/LiteralConverter.java @@ -26,6 +26,13 @@ import org.apache.calcite.util.TimeString; import org.apache.calcite.util.TimestampString; +/** + * Converts Calcite {@link RexLiteral} values to Substrait {@link Expression.Literal}, using {@link + * TypeConverter} for type resolution. + * + *

Supports numeric, boolean, character, binary, temporal, interval, ROW/ARRAY, and selected + * symbol/enums. Throws {@link UnsupportedOperationException} for unsupported types. + */ public class LiteralConverter { // TODO: Handle conversion of user-defined type literals @@ -50,6 +57,11 @@ public class LiteralConverter { private final TypeConverter typeConverter; + /** + * Creates a converter that uses the given {@link TypeConverter}. + * + * @param typeConverter converter for {@link RelDataType} to Substrait {@link Type} + */ public LiteralConverter(TypeConverter typeConverter) { this.typeConverter = typeConverter; } @@ -66,6 +78,16 @@ private static BigDecimal bd(RexLiteral literal) { return (BigDecimal) literal.getValue(); } + /** + * Converts a Calcite {@link RexLiteral} to a Substrait {@link Expression.Literal}. + * + *

Type conversion is performed first to ensure value compatibility. Null literals return a + * typed NULL. Unsupported cases throw an exception. + * + * @param literal the Calcite literal to convert + * @return the corresponding Substrait literal + * @throws UnsupportedOperationException if the literal type/value cannot be handled + */ public Expression.Literal convert(RexLiteral literal) { // convert type first to guarantee we can handle the value. final Type type = typeConverter.toSubstrait(literal.getType()); @@ -215,11 +237,28 @@ public Expression.Literal convert(RexLiteral literal) { } } + /** + * Pads a Calcite {@link org.apache.calcite.avatica.util.ByteString} right with zeros to the + * expected length if needed. + * + * @param bytes the Calcite {@code ByteString} value + * @param length the expected fixed length + * @return a new byte array of {@code length} with original bytes and trailing zeros if needed + * @throws IllegalArgumentException if {@code length} is less than {@code bytes.length} + */ public static byte[] padRightIfNeeded( org.apache.calcite.avatica.util.ByteString bytes, int length) { return padRightIfNeeded(bytes.getBytes(), length); } + /** + * Pads a byte array right with zeros to the expected length if needed. + * + * @param value the byte array value + * @param length the expected fixed length + * @return a new byte array of {@code length} with original bytes and trailing zeros if needed + * @throws IllegalArgumentException if {@code length} is less than {@code value.length} + */ public static byte[] padRightIfNeeded(byte[] value, int length) { if (length < value.length) { diff --git a/isthmus/src/main/java/io/substrait/isthmus/expression/RexExpressionConverter.java b/isthmus/src/main/java/io/substrait/isthmus/expression/RexExpressionConverter.java index 6993c8451..1a16a830e 100644 --- a/isthmus/src/main/java/io/substrait/isthmus/expression/RexExpressionConverter.java +++ b/isthmus/src/main/java/io/substrait/isthmus/expression/RexExpressionConverter.java @@ -32,6 +32,13 @@ import org.apache.calcite.sql.SqlKind; import org.apache.calcite.sql.fun.SqlStdOperatorTable; +/** + * Converts Calcite {@link RexNode} trees to Substrait {@link Expression}s. + * + *

Delegates function calls to registered {@link CallConverter}s and supports window function + * conversion via {@link WindowFunctionConverter}. Some Rex node kinds are intentionally unsupported + * and will throw {@link UnsupportedOperationException}. + */ public class RexExpressionConverter implements RexVisitor { private final List callConverters; @@ -39,14 +46,34 @@ public class RexExpressionConverter implements RexVisitor { private final TypeConverter typeConverter; private WindowFunctionConverter windowFunctionConverter; + /** + * Creates a converter with an explicit {@link SubstraitRelVisitor} and one or more call + * converters. + * + * @param relVisitor visitor used to convert subqueries/relations + * @param callConverters converters for Rex calls + */ public RexExpressionConverter(SubstraitRelVisitor relVisitor, CallConverter... callConverters) { this(relVisitor, Arrays.asList(callConverters), null, TypeConverter.DEFAULT); } + /** + * Creates a converter with the given call converters and default {@link TypeConverter}. + * + * @param callConverters converters for Rex calls + */ public RexExpressionConverter(CallConverter... callConverters) { this(null, Arrays.asList(callConverters), null, TypeConverter.DEFAULT); } + /** + * Creates a converter with full configuration. + * + * @param relVisitor visitor used to convert subqueries/relations; may be {@code null} + * @param callConverters converters for Rex calls + * @param windowFunctionConverter converter for window functions; may be {@code null} + * @param typeConverter converter from Calcite types to Substrait types + */ public RexExpressionConverter( SubstraitRelVisitor relVisitor, List callConverters, @@ -59,19 +86,34 @@ public RexExpressionConverter( } /** - * Only used for testing. Missing `ScalarFunctionConverter`, `CallConverters.CREATE_SEARCH_CONV` + * Testing-only constructor that wires default converters. + * + *

Missing {@code ScalarFunctionConverter} and {@code CallConverters.CREATE_SEARCH_CONV}. */ public RexExpressionConverter() { this(null, CallConverters.defaults(TypeConverter.DEFAULT), null, TypeConverter.DEFAULT); // TODO: Hide this AND/OR UPDATE tests } + /** + * Converts a {@link RexInputRef} to a root struct field reference. + * + * @param inputRef the input reference + * @return a Substrait field reference expression + */ @Override public Expression visitInputRef(RexInputRef inputRef) { return FieldReference.newRootStructReference( inputRef.getIndex(), typeConverter.toSubstrait(inputRef.getType())); } + /** + * Converts a {@link RexCall} using registered {@link CallConverter}s. + * + * @param call the Rex call node + * @return the converted Substrait expression + * @throws IllegalArgumentException if no converter can handle the call + */ @Override public Expression visitCall(RexCall call) { for (CallConverter c : callConverters) { @@ -84,6 +126,12 @@ public Expression visitCall(RexCall call) { throw new IllegalArgumentException(callConversionFailureMessage(call)); } + /** + * Builds a concise failure message for an unsupported call conversion. + * + * @param call the Rex call node + * @return a human-readable message describing the failure + */ private String callConversionFailureMessage(RexCall call) { return String.format( "Unable to convert call %s(%s).", @@ -93,11 +141,24 @@ private String callConversionFailureMessage(RexCall call) { .collect(Collectors.joining(", "))); } + /** + * Converts a {@link RexLiteral} to a Substrait literal expression. + * + * @param literal the Rex literal + * @return the converted Substrait expression + */ @Override public Expression visitLiteral(RexLiteral literal) { return (new LiteralConverter(typeConverter)).convert(literal); } + /** + * Converts a {@link RexOver} window function call. + * + * @param over the windowed call + * @return the converted Substrait expression + * @throws IllegalArgumentException if {@code IGNORE NULLS} is used or conversion fails + */ @Override public Expression visitOver(RexOver over) { if (over.ignoreNulls()) { @@ -109,21 +170,49 @@ public Expression visitOver(RexOver over) { .orElseThrow(() -> new IllegalArgumentException(callConversionFailureMessage(over))); } + /** + * Not supported. + * + * @param correlVariable the correl variable + * @return never returns + * @throws UnsupportedOperationException always + */ @Override public Expression visitCorrelVariable(RexCorrelVariable correlVariable) { throw new UnsupportedOperationException("RexCorrelVariable not supported"); } + /** + * Not supported. + * + * @param dynamicParam the dynamic parameter + * @return never returns + * @throws UnsupportedOperationException always + */ @Override public Expression visitDynamicParam(RexDynamicParam dynamicParam) { throw new UnsupportedOperationException("RexDynamicParam not supported"); } + /** + * Not supported. + * + * @param rangeRef the range ref + * @return never returns + * @throws UnsupportedOperationException always + */ @Override public Expression visitRangeRef(RexRangeRef rangeRef) { throw new UnsupportedOperationException("RexRangeRef not supported"); } + /** + * Converts a {@link RexFieldAccess} to a Substrait field reference expression. + * + * @param fieldAccess the field access + * @return the converted Substrait expression + * @throws UnsupportedOperationException for unsupported reference kinds + */ @Override public Expression visitFieldAccess(RexFieldAccess fieldAccess) { SqlKind kind = fieldAccess.getReferenceExpr().getKind(); @@ -155,6 +244,13 @@ public Expression visitFieldAccess(RexFieldAccess fieldAccess) { } } + /** + * Converts a {@link RexSubQuery} into a Substrait set or scalar subquery expression. + * + * @param subQuery the subquery node + * @return the converted Substrait expression + * @throws UnsupportedOperationException for unsupported subquery operators + */ @Override public Expression visitSubQuery(RexSubQuery subQuery) { Rel rel = relVisitor.apply(subQuery.rel); @@ -185,31 +281,73 @@ public Expression visitSubQuery(RexSubQuery subQuery) { throw new UnsupportedOperationException("RexSubQuery not supported"); } + /** + * Not supported. + * + * @param fieldRef the table input reference + * @return never returns + * @throws UnsupportedOperationException always + */ @Override public Expression visitTableInputRef(RexTableInputRef fieldRef) { throw new UnsupportedOperationException("RexTableInputRef not supported"); } + /** + * Not supported. + * + * @param localRef the local reference + * @return never returns + * @throws UnsupportedOperationException always + */ @Override public Expression visitLocalRef(RexLocalRef localRef) { throw new UnsupportedOperationException("RexLocalRef not supported"); } + /** + * Not supported. + * + * @param fieldRef the pattern field reference + * @return never returns + * @throws UnsupportedOperationException always + */ @Override public Expression visitPatternFieldRef(RexPatternFieldRef fieldRef) { throw new UnsupportedOperationException("RexPatternFieldRef not supported"); } + /** + * Not supported. + * + * @param rexLambda the lambda + * @return never returns + * @throws UnsupportedOperationException always + */ @Override public Expression visitLambda(RexLambda rexLambda) { throw new UnsupportedOperationException("RexLambda not supported"); } + /** + * Not supported. + * + * @param rexLambdaRef the lambda reference + * @return never returns + * @throws UnsupportedOperationException always + */ @Override public Expression visitLambdaRef(RexLambdaRef rexLambdaRef) { throw new UnsupportedOperationException("RexLambdaRef not supported"); } + /** + * Not supported. + * + * @param nodeAndFieldIndex the node/field index wrapper + * @return never returns + * @throws UnsupportedOperationException always + */ @Override public Expression visitNodeAndFieldIndex(RexNodeAndFieldIndex nodeAndFieldIndex) { throw new UnsupportedOperationException("RexNodeAndFieldIndex not supported"); diff --git a/isthmus/src/main/java/io/substrait/isthmus/expression/ScalarFunctionConverter.java b/isthmus/src/main/java/io/substrait/isthmus/expression/ScalarFunctionConverter.java index b3ad6514c..3025ea625 100644 --- a/isthmus/src/main/java/io/substrait/isthmus/expression/ScalarFunctionConverter.java +++ b/isthmus/src/main/java/io/substrait/isthmus/expression/ScalarFunctionConverter.java @@ -18,6 +18,13 @@ import org.apache.calcite.rex.RexCall; import org.apache.calcite.rex.RexNode; +/** + * Converts Calcite {@link RexCall} scalar functions to Substrait {@link Expression} using known + * Substrait {@link SimpleExtension.ScalarFunctionVariant} declarations. + * + *

Supports custom function mappers for special cases (e.g., TRIM, SQRT), and falls back to + * default signature-based matching. Produces {@link Expression.ScalarFunctionInvocation}. + */ public class ScalarFunctionConverter extends FunctionConverter< SimpleExtension.ScalarFunctionVariant, @@ -31,11 +38,25 @@ public class ScalarFunctionConverter */ private final List mappers; + /** + * Creates a converter with the given functions and type factory. + * + * @param functions available Substrait scalar function variants + * @param typeFactory Calcite type factory for type conversions + */ public ScalarFunctionConverter( List functions, RelDataTypeFactory typeFactory) { this(functions, Collections.emptyList(), typeFactory, TypeConverter.DEFAULT); } + /** + * Creates a converter with additional signatures and a custom type converter. + * + * @param functions available Substrait scalar function variants + * @param additionalSignatures extra Calcite-to-Substrait signature mappings + * @param typeFactory Calcite type factory for type conversions + * @param typeConverter converter for Calcite {@link RelDataType} to Substrait {@link Type} + */ public ScalarFunctionConverter( List functions, List additionalSignatures, @@ -46,11 +67,24 @@ public ScalarFunctionConverter( mappers = List.of(new TrimFunctionMapper(functions), new SqrtFunctionMapper(functions)); } + /** + * Returns the set of known scalar function signatures. + * + * @return immutable list of scalar signatures + */ @Override protected ImmutableList getSigs() { return FunctionMappings.SCALAR_SIGS; } + /** + * Converts a {@link RexCall} into a Substrait {@link Expression}, applying any registered custom + * mapping first, then default matching if needed. + * + * @param call the Calcite function call to convert + * @param topLevelConverter converter for nested operands + * @return the converted expression if a match is found; otherwise {@link Optional#empty()} + */ @Override public Optional convert( RexCall call, Function topLevelConverter) { @@ -108,6 +142,15 @@ private boolean isPotentialFunctionMatch(FunctionFinder finder, WrappedScalarCal return Objects.nonNull(finder) && finder.allowedArgCount((int) call.getOperands().count()); } + /** + * Builds an {@link Expression.ScalarFunctionInvocation} for a matched function. + * + * @param call the wrapped Calcite call providing operands and type + * @param function the Substrait scalar function declaration to invoke + * @param arguments converted argument list for the invocation + * @param outputType the Substrait output type for the invocation + * @return a scalar function invocation expression + */ @Override protected Expression generateBinding( WrappedScalarCall call, @@ -121,6 +164,13 @@ protected Expression generateBinding( .build(); } + /** + * Returns the Substrait arguments for a given scalar invocation, applying any custom mapping if + * present; otherwise returns the invocation's own arguments. + * + * @param expression the scalar function invocation + * @return the argument list, possibly remapped; never {@code null} + */ public List getExpressionArguments(Expression.ScalarFunctionInvocation expression) { // If a mapping applies to this expression, use it to get the arguments; otherwise default // behavior. @@ -136,6 +186,11 @@ private Optional> getMappedExpressionArguments( .orElse(Optional.empty()); } + /** + * Wrapped view of a {@link RexCall} for signature matching. + * + *

Provides operand stream and type info used by {@link FunctionFinder}. + */ protected static class WrappedScalarCall implements FunctionConverter.GenericCall { private final RexCall delegate; @@ -144,11 +199,21 @@ private WrappedScalarCall(RexCall delegate) { this.delegate = delegate; } + /** + * Returns the operand stream of the underlying {@link RexCall}. + * + * @return stream of operands + */ @Override public Stream getOperands() { return delegate.getOperands().stream(); } + /** + * Returns the Calcite type of the underlying {@link RexCall}. + * + * @return call type + */ @Override public RelDataType getType() { return delegate.getType(); diff --git a/isthmus/src/main/java/io/substrait/isthmus/expression/SortFieldConverter.java b/isthmus/src/main/java/io/substrait/isthmus/expression/SortFieldConverter.java index 773632164..5c458f18b 100644 --- a/isthmus/src/main/java/io/substrait/isthmus/expression/SortFieldConverter.java +++ b/isthmus/src/main/java/io/substrait/isthmus/expression/SortFieldConverter.java @@ -5,9 +5,22 @@ import org.apache.calcite.rel.RelFieldCollation.Direction; import org.apache.calcite.rex.RexFieldCollation; +/** + * Utility for converting Calcite {@link RexFieldCollation} objects into Substrait {@link + * Expression.SortField} representations. + * + *

Handles sort direction and null ordering. + */ public class SortFieldConverter { - /** Converts a {@link RexFieldCollation} to a {@link Expression.SortField}. */ + /** + * Converts a Calcite {@link RexFieldCollation} to a Substrait {@link Expression.SortField}. + * + * @param rexFieldCollation The Calcite field collation to convert. + * @param rexExpressionConverter Converter for translating the field expression. + * @return A Substrait {@link Expression.SortField} with the appropriate direction and expression. + * @throws IllegalArgumentException if the collation direction is unsupported. + */ public static Expression.SortField toSortField( RexFieldCollation rexFieldCollation, RexExpressionConverter rexExpressionConverter) { Expression expr = rexFieldCollation.left.accept(rexExpressionConverter); @@ -16,6 +29,13 @@ public static Expression.SortField toSortField( return Expression.SortField.builder().expr(expr).direction(direction).build(); } + /** + * Determines the Substrait {@link Expression.SortDirection} based on Calcite collation details. + * + * @param collation The Calcite {@link RexFieldCollation}. + * @return The corresponding Substrait sort direction. + * @throws IllegalArgumentException if the direction is not ASCENDING or DESCENDING. + */ private static Expression.SortDirection asSortDirection(RexFieldCollation collation) { RelFieldCollation.Direction direction = collation.getDirection(); diff --git a/isthmus/src/main/java/io/substrait/isthmus/expression/SqlArrayValueConstructorCallConverter.java b/isthmus/src/main/java/io/substrait/isthmus/expression/SqlArrayValueConstructorCallConverter.java index c8805a901..3724b14f7 100644 --- a/isthmus/src/main/java/io/substrait/isthmus/expression/SqlArrayValueConstructorCallConverter.java +++ b/isthmus/src/main/java/io/substrait/isthmus/expression/SqlArrayValueConstructorCallConverter.java @@ -15,14 +15,36 @@ import org.apache.calcite.sql.SqlOperator; import org.apache.calcite.sql.fun.SqlArrayValueConstructor; +/** Converts Calcite {@link SqlArrayValueConstructor} calls into Substrait list literals. */ public class SqlArrayValueConstructorCallConverter implements CallConverter { private final TypeConverter typeConverter; + /** + * Creates a converter for array value constructors using the supplied {@link TypeConverter}. + * + * @param typeConverter Converter for Calcite element types to Substrait {@link Type}. + */ public SqlArrayValueConstructorCallConverter(TypeConverter typeConverter) { this.typeConverter = typeConverter; } + /** + * Attempts to convert a Calcite {@link RexCall} of {@link SqlArrayValueConstructor} into a + * Substrait list expression. + * + *

Empty arrays are converted using {@link ExpressionCreator#emptyList(boolean, Type)} based on + * the element type. Non-empty arrays are converted to a list of literals if all operands are + * {@link Expression.Literal}, otherwise to a {@link Expression.NestedList}. + * + * @param call The Calcite array constructor call. + * @param topLevelConverter Function converting {@link RexNode} operands to Substrait {@link + * Expression}s. + * @return An {@link Optional} containing the converted {@link Expression} if the operator is + * {@link SqlArrayValueConstructor}; otherwise {@link Optional#empty()}. + * @throws ClassCastException if non-empty operands are converted by {@code topLevelConverter} + * into non-literal expressions when a literal list is required. + */ @Override public Optional convert( RexCall call, Function topLevelConverter) { diff --git a/isthmus/src/main/java/io/substrait/isthmus/expression/SqlMapValueConstructorCallConverter.java b/isthmus/src/main/java/io/substrait/isthmus/expression/SqlMapValueConstructorCallConverter.java index 8cf4958d8..f44b9aed4 100644 --- a/isthmus/src/main/java/io/substrait/isthmus/expression/SqlMapValueConstructorCallConverter.java +++ b/isthmus/src/main/java/io/substrait/isthmus/expression/SqlMapValueConstructorCallConverter.java @@ -13,10 +13,29 @@ import org.apache.calcite.sql.SqlOperator; import org.apache.calcite.sql.fun.SqlMapValueConstructor; +/** + * Converts Calcite {@link SqlMapValueConstructor} calls into Substrait map literals. + * + *

Expects an even-numbered operand list (key/value pairs) and produces an {@link Expression} map + * literal via {@link ExpressionCreator}. + */ public class SqlMapValueConstructorCallConverter implements CallConverter { SqlMapValueConstructorCallConverter() {} + /** + * Attempts to convert a Calcite {@link RexCall} representing a {@link SqlMapValueConstructor} + * into a Substrait map literal. + * + * @param call The Calcite call to convert. + * @param topLevelConverter Function for converting {@link RexNode} operands to Substrait {@link + * Expression}s. + * @return An {@link Optional} containing the converted {@link Expression} if the operator is a + * {@link SqlMapValueConstructor}; otherwise {@link Optional#empty()}. + * @throws ClassCastException if operands converted by {@code topLevelConverter} are not {@link + * Expression.Literal} instances. + * @throws AssertionError if the number of operands is not even (expecting key/value pairs). + */ @Override public Optional convert( RexCall call, Function topLevelConverter) { diff --git a/isthmus/src/main/java/io/substrait/isthmus/expression/WindowBoundConverter.java b/isthmus/src/main/java/io/substrait/isthmus/expression/WindowBoundConverter.java index 8979b0d26..2d86cf34e 100644 --- a/isthmus/src/main/java/io/substrait/isthmus/expression/WindowBoundConverter.java +++ b/isthmus/src/main/java/io/substrait/isthmus/expression/WindowBoundConverter.java @@ -7,9 +7,33 @@ import org.apache.calcite.rex.RexWindowBound; import org.apache.calcite.sql.type.SqlTypeName; +/** + * Utility for converting Calcite {@link RexWindowBound} to Substrait {@link WindowBound}. + * + *

Supports {@code CURRENT ROW}, {@code UNBOUNDED}, and integer-offset {@code PRECEDING}/{@code + * FOLLOWING} bounds. + */ public class WindowBoundConverter { - /** Converts a {@link RexWindowBound} to a {@link WindowBound}. */ + /** + * Converts a Calcite {@link RexWindowBound} to a Substrait {@link WindowBound}. + * + *

Accepted forms: + * + *

    + *
  • {@code CURRENT ROW} → {@link WindowBound#CURRENT_ROW} + *
  • {@code UNBOUNDED} → {@link WindowBound#UNBOUNDED} + *
  • {@code PRECEDING n} / {@code FOLLOWING n} where {@code n} is an exact integer → {@link + * WindowBound.Preceding}/{@link WindowBound.Following} + *
+ * + * @param rexWindowBound The Calcite window bound to convert. + * @return The corresponding Substrait {@link WindowBound}. + * @throws IllegalStateException if the bound is not one of CURRENT ROW, UNBOUNDED, PRECEDING, or + * FOLLOWING. + * @throws IllegalArgumentException if the offset is not an exact integer type supported by + * Substrait. + */ public static WindowBound toWindowBound(RexWindowBound rexWindowBound) { if (rexWindowBound.isCurrentRow()) { return WindowBound.CURRENT_ROW; diff --git a/isthmus/src/main/java/io/substrait/isthmus/expression/WindowFunctionConverter.java b/isthmus/src/main/java/io/substrait/isthmus/expression/WindowFunctionConverter.java index 9e35492e3..17c651ebc 100644 --- a/isthmus/src/main/java/io/substrait/isthmus/expression/WindowFunctionConverter.java +++ b/isthmus/src/main/java/io/substrait/isthmus/expression/WindowFunctionConverter.java @@ -24,22 +24,47 @@ import org.apache.calcite.rex.RexWindow; import org.apache.calcite.sql.SqlAggFunction; +/** + * Converts Calcite window function calls ({@link RexOver}) into Substrait {@link + * Expression.WindowFunctionInvocation}s using configured Substrait window function variants. + * + *

Handles partitioning, ordering, bounds (ROWS/RANGE, lower/upper), and DISTINCT/ALL invocation. + */ public class WindowFunctionConverter extends FunctionConverter< SimpleExtension.WindowFunctionVariant, Expression.WindowFunctionInvocation, WindowFunctionConverter.WrappedWindowCall> { + /** + * Returns the supported window function signatures used for matching. + * + * @return immutable list of supported signatures. + */ @Override protected ImmutableList getSigs() { return FunctionMappings.WINDOW_SIGS; } + /** + * Creates a converter with the provided window function variants. + * + * @param functions Supported Substrait window function variants. + * @param typeFactory Calcite type factory for type handling. + */ public WindowFunctionConverter( List functions, RelDataTypeFactory typeFactory) { super(functions, typeFactory); } + /** + * Creates a converter with provided function variants and additional signatures. + * + * @param functions Supported Substrait window function variants. + * @param additionalSignatures Extra signatures to consider during matching. + * @param typeFactory Calcite type factory for type handling. + * @param typeConverter Converter for Calcite/Substrait types. + */ public WindowFunctionConverter( List functions, List additionalSignatures, @@ -48,6 +73,15 @@ public WindowFunctionConverter( super(functions, additionalSignatures, typeFactory, typeConverter); } + /** + * Generates a bound Substrait window function invocation for a matched call. + * + * @param call Wrapped window call, including {@link RexOver} and expression converter. + * @param function Selected Substrait function variant. + * @param arguments Converted Substrait function arguments. + * @param outputType Result type for the invocation. + * @return Built {@link Expression.WindowFunctionInvocation}. + */ @Override protected Expression.WindowFunctionInvocation generateBinding( WrappedWindowCall call, @@ -92,6 +126,19 @@ protected Expression.WindowFunctionInvocation generateBinding( arguments); } + /** + * Attempts to convert a Calcite {@link RexOver} call into a Substrait window function invocation. + * + *

Resolves the corresponding Substrait aggregate function variant, checks arity using + * signatures, and builds the invocation if match succeeds. + * + * @param over Calcite windowed aggregate call. + * @param topLevelConverter Function converting top-level {@link RexNode}s to Substrait {@link + * Expression}s. + * @param rexExpressionConverter Converter for nested {@link RexNode} expressions. + * @return {@link Optional} containing the {@link Expression.WindowFunctionInvocation} if matched; + * otherwise empty. + */ public Optional convert( RexOver over, Function topLevelConverter, diff --git a/isthmus/src/main/java/io/substrait/isthmus/expression/WindowRelFunctionConverter.java b/isthmus/src/main/java/io/substrait/isthmus/expression/WindowRelFunctionConverter.java index b1b9a201f..e75cc3ddd 100644 --- a/isthmus/src/main/java/io/substrait/isthmus/expression/WindowRelFunctionConverter.java +++ b/isthmus/src/main/java/io/substrait/isthmus/expression/WindowRelFunctionConverter.java @@ -23,22 +23,48 @@ import org.apache.calcite.rex.RexWindowBound; import org.apache.calcite.sql.SqlAggFunction; +/** + * Converts Calcite window aggregate calls (from {@link Window.RexWinAggCall}) into Substrait {@link + * ConsistentPartitionWindow.WindowRelFunctionInvocation}s. + * + *

Handles bounds type (ROWS/RANGE), lower/upper bounds, DISTINCT/ALL invocation, and function + * signature matching against configured Substrait window function variants. + */ public class WindowRelFunctionConverter extends FunctionConverter< SimpleExtension.WindowFunctionVariant, ConsistentPartitionWindow.WindowRelFunctionInvocation, WindowRelFunctionConverter.WrappedWindowRelCall> { + /** + * Returns the supported window function signatures used for matching. + * + * @return immutable list of supported signatures. + */ @Override protected ImmutableList getSigs() { return FunctionMappings.WINDOW_SIGS; } + /** + * Creates a converter with the provided window function variants. + * + * @param functions Supported Substrait window function variants. + * @param typeFactory Calcite type factory for type handling. + */ public WindowRelFunctionConverter( List functions, RelDataTypeFactory typeFactory) { super(functions, typeFactory); } + /** + * Creates a converter with provided function variants and additional signatures. + * + * @param functions Supported Substrait window function variants. + * @param additionalSignatures Extra signatures to consider during matching. + * @param typeFactory Calcite type factory for type handling. + * @param typeConverter Converter for Calcite/Substrait types. + */ public WindowRelFunctionConverter( List functions, List additionalSignatures, @@ -47,6 +73,15 @@ public WindowRelFunctionConverter( super(functions, additionalSignatures, typeFactory, typeConverter); } + /** + * Generates a bound Substrait window relation function invocation for a matched call. + * + * @param call Wrapped window rel call, including bounds and the original win-agg call. + * @param function Selected Substrait function variant. + * @param arguments Converted Substrait function arguments. + * @param outputType Result type for the invocation. + * @return Built {@link ConsistentPartitionWindow.WindowRelFunctionInvocation}. + */ @Override protected ConsistentPartitionWindow.WindowRelFunctionInvocation generateBinding( WrappedWindowRelCall call, @@ -77,6 +112,22 @@ protected ConsistentPartitionWindow.WindowRelFunctionInvocation generateBinding( arguments); } + /** + * Attempts to convert a Calcite {@link Window.RexWinAggCall} into a Substrait window relation + * function invocation. + * + *

Resolves the corresponding Substrait aggregate function variant, checks arity using + * signatures, and builds the invocation if match succeeds. + * + * @param winAggCall Calcite window aggregate call. + * @param lowerBound Lower bound of the window. + * @param upperBound Upper bound of the window. + * @param isRows Whether the window uses ROWS (true) or RANGE (false). + * @param topLevelConverter Function converting top-level {@link RexNode}s to Substrait {@link + * Expression}s. + * @return {@link Optional} containing the {@link + * ConsistentPartitionWindow.WindowRelFunctionInvocation} if matched; otherwise empty. + */ public Optional convert( Window.RexWinAggCall winAggCall, RexWindowBound lowerBound, @@ -127,18 +178,38 @@ public RelDataType getType() { return winAggCall.getType(); } + /** + * Returns the underlying Calcite window aggregate call. + * + * @return the {@link Window.RexWinAggCall}. + */ public Window.RexWinAggCall getWinAggCall() { return winAggCall; } + /** + * Returns the lower bound of the window. + * + * @return the {@link RexWindowBound} lower bound. + */ public RexWindowBound getLowerBound() { return lowerBound; } + /** + * Returns the upper bound of the window. + * + * @return the {@link RexWindowBound} upper bound. + */ public RexWindowBound getUpperBound() { return upperBound; } + /** + * Whether the window uses ROWS (true) or RANGE (false). + * + * @return {@code true} if ROWS; {@code false} if RANGE. + */ public boolean isRows() { return isRows; } diff --git a/isthmus/src/main/java/io/substrait/isthmus/sql/SubstraitCreateStatementParser.java b/isthmus/src/main/java/io/substrait/isthmus/sql/SubstraitCreateStatementParser.java index d497f00bd..f0b502590 100644 --- a/isthmus/src/main/java/io/substrait/isthmus/sql/SubstraitCreateStatementParser.java +++ b/isthmus/src/main/java/io/substrait/isthmus/sql/SubstraitCreateStatementParser.java @@ -24,6 +24,7 @@ /** Utility class for parsing CREATE statements into a {@link CalciteCatalogReader} */ public class SubstraitCreateStatementParser { + /** An empty catalog reader used for validating CREATE statements. */ public static final CalciteCatalogReader EMPTY_CATALOG = new CalciteCatalogReader( CalciteSchema.createRootSchema(false), @@ -31,21 +32,21 @@ public class SubstraitCreateStatementParser { SubstraitTypeSystem.TYPE_FACTORY, SqlConverterBase.CONNECTION_CONFIG); - // A validator is needed to convert the types in column declarations to Calcite types + /** SQL validator configured for validating CREATE statements against the empty catalog. */ public static final SqlValidator VALIDATOR = new SubstraitSqlValidator( // as we are validating CREATE statements, an empty catalog suffices EMPTY_CATALOG); /** - * Parses a SQL string containing only CREATE statements into a list of {@link SubstraitTable}s + * Parses a SQL string containing only CREATE statements into a list of {@link SubstraitTable}s. * *

This method only supports simple table names without any additional qualifiers. Only used * with {@link io.substrait.isthmus.SqlExpressionToSubstrait}. * - * @param createStatements a SQL string containing only CREATE statements, must not be null - * @return a list of {@link SubstraitTable}s generated from the CREATE statements - * @throws SqlParseException + * @param createStatements a SQL string containing only CREATE statements; must not be null + * @return list of {@link SubstraitTable}s generated from the CREATE statements + * @throws SqlParseException if parsing fails or statements are invalid */ public static List processCreateStatements(@NonNull final String createStatements) throws SqlParseException { @@ -75,13 +76,14 @@ public static List processCreateStatements(@NonNull final String /** * Parses one or more SQL strings containing only CREATE statements into a {@link - * CalciteCatalogReader} + * CalciteCatalogReader}. * *

This method expects the use of fully qualified table names in the CREATE statements. * - * @param createStatements a SQL string containing only CREATE statements, must not be null + * @param createStatements one or more SQL strings containing only CREATE statements; must not be + * null * @return a {@link CalciteCatalogReader} generated from the CREATE statements - * @throws SqlParseException + * @throws SqlParseException if parsing fails or statements are invalid */ public static CalciteCatalogReader processCreateStatementsToCatalog( @NonNull final String... createStatements) throws SqlParseException { @@ -97,9 +99,9 @@ public static CalciteCatalogReader processCreateStatementsToCatalog( /** * Creates a new {@link SqlParseException} with the given message and {@link SqlParserPos}. * - * @param message the exception message, may be null - * @param pos the position where this error occured, may be null - * @return the {@link SqlParseException} with the given message and {@link SqlParserPos} + * @param message the exception message; may be null + * @param pos the position where this error occurred; may be null + * @return a {@link SqlParseException} with the given message and position */ private static SqlParseException fail( @Nullable final String message, @Nullable final SqlParserPos pos) { @@ -109,8 +111,8 @@ private static SqlParseException fail( /** * Creates a new {@link SqlParseException} with the given message. * - * @param message the exception message, may be null - * @return the {@link SqlParseException} with the given message + * @param message the exception message; may be null + * @return a {@link SqlParseException} with the given message */ private static SqlParseException fail(@Nullable final String message) { return fail(message, SqlParserPos.ZERO); @@ -119,9 +121,10 @@ private static SqlParseException fail(@Nullable final String message) { /** * Parses one or more SQL strings containing only CREATE statements into a {@link CalciteSchema}. * - * @param createStatements a SQL string containing only CREATE statements, must not be null + * @param createStatements one or more SQL strings containing only CREATE statements; must not be + * null * @return a {@link CalciteSchema} generated from the CREATE statements - * @throws SqlParseException + * @throws SqlParseException if parsing fails or statements are invalid */ private static CalciteSchema processCreateStatementsToSchema( @NonNull final String... createStatements) throws SqlParseException { @@ -158,11 +161,11 @@ private static CalciteSchema processCreateStatementsToSchema( * Creates a new {@link SubstraitTable} with the given table name and the table schema from the * given {@link SqlNodeList} containing {@link SqlColumnDeclaration}s. * - * @param tableName the table name to use, must not be null - * @param columnList the {@link SqlNodeList} containing {@link SqlColumnDeclaration}s to create - * the table schema from, must not be null - * @return the {@link SubstraitTable} - * @throws SqlParseException + * @param tableName the table name to use; must not be null + * @param columnList the {@link SqlNodeList} containing {@link SqlColumnDeclaration}s to build the + * table schema from; must not be null + * @return the constructed {@link SubstraitTable} + * @throws SqlParseException if the column list contains unexpected nodes or invalid names */ private static SubstraitTable createSubstraitTable( @NonNull final String tableName, @NonNull final SqlNodeList columnList) diff --git a/isthmus/src/main/java/io/substrait/isthmus/sql/SubstraitSqlDialect.java b/isthmus/src/main/java/io/substrait/isthmus/sql/SubstraitSqlDialect.java index dd70db0cb..60538843c 100644 --- a/isthmus/src/main/java/io/substrait/isthmus/sql/SubstraitSqlDialect.java +++ b/isthmus/src/main/java/io/substrait/isthmus/sql/SubstraitSqlDialect.java @@ -7,16 +7,25 @@ import org.apache.calcite.sql.util.SqlString; /** - * {@link SqlDialect} used by Isthmus for parsing + * {@link SqlDialect} implementation used by Isthmus for SQL parsing and generation. * - *

Intended primarily for internal testing + *

Primarily intended for internal testing and conversion of Calcite {@link RelNode} trees to + * SQL. */ public class SubstraitSqlDialect extends SqlDialect { + /** Default context for the Substrait SQL dialect. */ public static SqlDialect.Context DEFAULT_CONTEXT = SqlDialect.EMPTY_CONTEXT; + /** Default instance of Substrait SQL dialect. */ public static SqlDialect DEFAULT = new SubstraitSqlDialect(DEFAULT_CONTEXT); + /** + * Converts a Calcite {@link RelNode} to its SQL representation using the default dialect. + * + * @param relNode The Calcite relational node to convert. + * @return A {@link SqlString} representing the SQL equivalent of the given {@link RelNode}. + */ public static SqlString toSql(RelNode relNode) { RelToSqlConverter relToSql = new RelToSqlConverter(DEFAULT); SqlNode sqlNode = relToSql.visitRoot(relNode).asStatement(); @@ -28,10 +37,20 @@ public static SqlString toSql(RelNode relNode) { .withIndentation(0)); } + /** + * Constructs a Substrait SQL dialect with the given context. + * + * @param context The {@link SqlDialect.Context} providing configuration for SQL generation. + */ public SubstraitSqlDialect(Context context) { super(context); } + /** + * Indicates whether this dialect supports approximate COUNT(DISTINCT) operations. + * + * @return {@code true}, as Substrait SQL dialect supports approximate count distinct. + */ @Override public boolean supportsApproxCountDistinct() { return true; diff --git a/isthmus/src/main/java/io/substrait/isthmus/sql/SubstraitSqlValidator.java b/isthmus/src/main/java/io/substrait/isthmus/sql/SubstraitSqlValidator.java index 07b6edda8..b7fc0bd15 100644 --- a/isthmus/src/main/java/io/substrait/isthmus/sql/SubstraitSqlValidator.java +++ b/isthmus/src/main/java/io/substrait/isthmus/sql/SubstraitSqlValidator.java @@ -6,14 +6,32 @@ import org.apache.calcite.sql.validate.SqlValidator; import org.apache.calcite.sql.validate.SqlValidatorImpl; +/** + * Custom SQL validator for Substrait SQL dialect. + * + *

Uses {@link SubstraitOperatorTable} and Calcite's validation framework to validate SQL + * statements for Substrait-specific operators. + */ public class SubstraitSqlValidator extends SqlValidatorImpl { + /** Default configuration for the validator with identifier expansion enabled. */ static SqlValidator.Config CONFIG = Config.DEFAULT.withIdentifierExpansion(true); + /** + * Creates a Substrait SQL validator using the default operator table. + * + * @param catalogReader The {@link Prepare.CatalogReader} providing schema and type information. + */ public SubstraitSqlValidator(Prepare.CatalogReader catalogReader) { super(SubstraitOperatorTable.INSTANCE, catalogReader, catalogReader.getTypeFactory(), CONFIG); } + /** + * Creates a Substrait SQL validator using a custom operator table. + * + * @param catalogReader The {@link Prepare.CatalogReader} providing schema and type information. + * @param opTable The {@link SqlOperatorTable} containing SQL operators for validation. + */ public SubstraitSqlValidator(Prepare.CatalogReader catalogReader, SqlOperatorTable opTable) { super(opTable, catalogReader, catalogReader.getTypeFactory(), CONFIG); }