From 58857df6adde4d50e1e1b388dd72f6b674768af0 Mon Sep 17 00:00:00 2001 From: Peter Toth Date: Wed, 27 May 2026 12:20:57 +0200 Subject: [PATCH 1/6] GH-3596: Add RowRanges.Builder for incremental construction from selected row indices ### Rationale for this change Opening up APIs needed by a later materialization feature in Spark. External readers need to assemble a RowRanges incrementally from a stream of selected row indices (e.g. produced by a downstream filter or join) without having to know page boundaries ahead of time. ### What changes are included in this PR? Adds a Builder to RowRanges that takes a strictly-increasing sequence of selected row indices via addSelected(long) and coalesces consecutive indices into Range entries. Out-of-order or duplicate calls throw IllegalArgumentException. ### Are these changes tested? Yes. TestRowRanges covers single/multiple/coalesced ranges, the empty builder case, and the out-of-order/duplicate rejection paths. ### Are there any user-facing changes? No. Closes #3596 Co-authored-by: Matt Butrovich --- .../filter2/columnindex/RowRanges.java | 68 ++++++++++++ .../filter2/columnindex/TestRowRanges.java | 102 ++++++++++++++++++ 2 files changed, 170 insertions(+) diff --git a/parquet-column/src/main/java/org/apache/parquet/internal/filter2/columnindex/RowRanges.java b/parquet-column/src/main/java/org/apache/parquet/internal/filter2/columnindex/RowRanges.java index 0b2257a6bc..eb3b2abdd2 100644 --- a/parquet-column/src/main/java/org/apache/parquet/internal/filter2/columnindex/RowRanges.java +++ b/parquet-column/src/main/java/org/apache/parquet/internal/filter2/columnindex/RowRanges.java @@ -316,4 +316,72 @@ public List getRanges() { public String toString() { return ranges.toString(); } + + /** + * @return a new {@link Builder} for constructing a {@link RowRanges} from a sequence of + * selected row indices. + */ + public static Builder builder() { + return new Builder(); + } + + /** + * Constructs a {@link RowRanges} by appending selected row indices in strictly increasing + * order. Consecutive indices are coalesced into a single {@link Range}; gaps close the + * current run and start a new one. + * + *

Usage: + *

{@code
+   * RowRanges.Builder builder = RowRanges.builder();
+   * for (long row : selectedRowsInOrder) {
+   *   builder.addSelected(row);
+   * }
+   * RowRanges ranges = builder.build();
+   * }
+ */ + public static class Builder { + private final List ranges = new ArrayList<>(); + private long runStart = -1; // -1 = no active run + private long runEnd = -1; // valid iff runStart >= 0 + + /** + * Marks {@code blockRow} as selected. Must be called in strictly increasing order; calling + * with a value less than or equal to the previous call's value throws + * {@link IllegalArgumentException}. + * + * @param blockRow the row index to mark selected (must be {@code >} the last value passed) + * @return this builder for chaining + */ + public Builder addSelected(long blockRow) { + if (runStart < 0) { + runStart = blockRow; + runEnd = blockRow; + } else if (blockRow == runEnd + 1) { + runEnd = blockRow; + } else if (blockRow > runEnd + 1) { + ranges.add(new Range(runStart, runEnd)); + runStart = blockRow; + runEnd = blockRow; + } else { + throw new IllegalArgumentException( + "addSelected requires strictly increasing row indices; got " + blockRow + " after " + runEnd); + } + return this; + } + + /** + * @return the constructed {@link RowRanges}, or {@link RowRanges#EMPTY} when no rows were + * selected. + */ + public RowRanges build() { + if (runStart >= 0) { + ranges.add(new Range(runStart, runEnd)); + runStart = -1; + } + if (ranges.isEmpty()) { + return RowRanges.EMPTY; + } + return new RowRanges(ranges); + } + } } diff --git a/parquet-column/src/test/java/org/apache/parquet/internal/filter2/columnindex/TestRowRanges.java b/parquet-column/src/test/java/org/apache/parquet/internal/filter2/columnindex/TestRowRanges.java index 9c6b9f737c..34cd0f8203 100644 --- a/parquet-column/src/test/java/org/apache/parquet/internal/filter2/columnindex/TestRowRanges.java +++ b/parquet-column/src/test/java/org/apache/parquet/internal/filter2/columnindex/TestRowRanges.java @@ -152,4 +152,106 @@ public void testIntersection() { assertAllRowsEqual(intersection(empty, ranges2).iterator()); assertAllRowsEqual(intersection(empty, empty).iterator()); } + + @Test + public void testBuilderBasic() { + // Select rows 2, 3, 4, 5 (one contiguous run) + RowRanges ranges = RowRanges.builder() + .addSelected(2) + .addSelected(3) + .addSelected(4) + .addSelected(5) + .build(); + assertAllRowsEqual(ranges.iterator(), 2, 3, 4, 5); + assertEquals(4, ranges.rowCount()); + } + + @Test + public void testBuilderMultipleRanges() { + // Two runs: 1-2 and 5-7 + RowRanges ranges = RowRanges.builder() + .addSelected(1) + .addSelected(2) + .addSelected(5) + .addSelected(6) + .addSelected(7) + .build(); + assertAllRowsEqual(ranges.iterator(), 1, 2, 5, 6, 7); + assertEquals(5, ranges.rowCount()); + assertTrue(ranges.isOverlapping(1, 2)); + assertTrue(ranges.isOverlapping(5, 7)); + assertFalse(ranges.isOverlapping(3, 4)); + } + + @Test + public void testBuilderEmpty() { + // No rows selected + RowRanges ranges = RowRanges.builder().build(); + assertEquals(RowRanges.EMPTY, ranges); + assertEquals(0, ranges.rowCount()); + assertAllRowsEqual(ranges.iterator()); + } + + @Test + public void testBuilderAllSelected() { + // Five contiguous rows starting at 0 + RowRanges.Builder builder = RowRanges.builder(); + for (long i = 0; i < 5; i++) { + builder.addSelected(i); + } + RowRanges ranges = builder.build(); + assertAllRowsEqual(ranges.iterator(), 0, 1, 2, 3, 4); + assertEquals(5, ranges.rowCount()); + } + + @Test + public void testBuilderSingleRow() { + RowRanges ranges = RowRanges.builder().addSelected(3).build(); + assertAllRowsEqual(ranges.iterator(), 3); + assertEquals(1, ranges.rowCount()); + assertTrue(ranges.isOverlapping(3, 3)); + assertFalse(ranges.isOverlapping(0, 2)); + assertFalse(ranges.isOverlapping(4, 10)); + } + + @Test + public void testBuilderAlternating() { + // Every other row selected: 0, 2, 4, 6, 8 — five singleton runs. + RowRanges.Builder builder = RowRanges.builder(); + for (long i = 0; i < 10; i += 2) { + builder.addSelected(i); + } + RowRanges ranges = builder.build(); + assertAllRowsEqual(ranges.iterator(), 0, 2, 4, 6, 8); + assertEquals(5, ranges.rowCount()); + } + + @Test + public void testBuilderFirstAndLast() { + RowRanges ranges = RowRanges.builder().addSelected(0).addSelected(99).build(); + assertAllRowsEqual(ranges.iterator(), 0, 99); + assertEquals(2, ranges.rowCount()); + } + + @Test + public void testBuilderRejectsOutOfOrder() { + RowRanges.Builder builder = RowRanges.builder().addSelected(5).addSelected(7); + try { + builder.addSelected(6); + org.junit.Assert.fail("expected IllegalArgumentException for out-of-order index"); + } catch (IllegalArgumentException expected) { + // expected + } + } + + @Test + public void testBuilderRejectsDuplicate() { + RowRanges.Builder builder = RowRanges.builder().addSelected(3); + try { + builder.addSelected(3); + org.junit.Assert.fail("expected IllegalArgumentException for duplicate index"); + } catch (IllegalArgumentException expected) { + // expected + } + } } From 17cce6912afcc3836d96f703fae0462b67b5dff2 Mon Sep 17 00:00:00 2001 From: Peter Toth Date: Tue, 30 Jun 2026 14:42:56 +0200 Subject: [PATCH 2/6] address review comments --- .../filter2/columnindex/RowRanges.java | 49 ++++++++------ .../filter2/columnindex/TestRowRanges.java | 66 ++++++++++++++----- 2 files changed, 78 insertions(+), 37 deletions(-) diff --git a/parquet-column/src/main/java/org/apache/parquet/internal/filter2/columnindex/RowRanges.java b/parquet-column/src/main/java/org/apache/parquet/internal/filter2/columnindex/RowRanges.java index eb3b2abdd2..e3c388c17a 100644 --- a/parquet-column/src/main/java/org/apache/parquet/internal/filter2/columnindex/RowRanges.java +++ b/parquet-column/src/main/java/org/apache/parquet/internal/filter2/columnindex/RowRanges.java @@ -334,54 +334,63 @@ public static Builder builder() { *
{@code
    * RowRanges.Builder builder = RowRanges.builder();
    * for (long row : selectedRowsInOrder) {
-   *   builder.addSelected(row);
+   *   builder.addSelectedRow(row);
    * }
    * RowRanges ranges = builder.build();
    * }
*/ - public static class Builder { + public static final class Builder { private final List ranges = new ArrayList<>(); private long runStart = -1; // -1 = no active run private long runEnd = -1; // valid iff runStart >= 0 + private Builder() {} + /** - * Marks {@code blockRow} as selected. Must be called in strictly increasing order; calling - * with a value less than or equal to the previous call's value throws - * {@link IllegalArgumentException}. + * Marks {@code rowIndex} as selected. The value is a 0-based row index within the current row + * group. Must be called in strictly increasing order; calling with a value less than or equal + * to the previous call's value throws {@link IllegalArgumentException}. * - * @param blockRow the row index to mark selected (must be {@code >} the last value passed) + * @param rowIndex the 0-based row index to mark selected (must be {@code >} the last value + * passed and non-negative) * @return this builder for chaining */ - public Builder addSelected(long blockRow) { + public Builder addSelectedRow(long rowIndex) { + if (rowIndex < 0) { + throw new IllegalArgumentException("addSelectedRow requires a non-negative row index; got " + rowIndex); + } if (runStart < 0) { - runStart = blockRow; - runEnd = blockRow; - } else if (blockRow == runEnd + 1) { - runEnd = blockRow; - } else if (blockRow > runEnd + 1) { + runStart = rowIndex; + runEnd = rowIndex; + } else if (rowIndex == runEnd + 1) { + runEnd = rowIndex; + } else if (rowIndex > runEnd + 1) { ranges.add(new Range(runStart, runEnd)); - runStart = blockRow; - runEnd = blockRow; + runStart = rowIndex; + runEnd = rowIndex; } else { - throw new IllegalArgumentException( - "addSelected requires strictly increasing row indices; got " + blockRow + " after " + runEnd); + throw new IllegalArgumentException("addSelectedRow requires strictly increasing row indices; got " + + rowIndex + " after " + runEnd); } return this; } /** + * Returns a snapshot of the rows selected so far. The returned {@link RowRanges} is independent + * of this builder, so the builder may continue to be used afterwards without affecting it. + * * @return the constructed {@link RowRanges}, or {@link RowRanges#EMPTY} when no rows were * selected. */ public RowRanges build() { + List snapshot = new ArrayList<>(ranges); if (runStart >= 0) { - ranges.add(new Range(runStart, runEnd)); - runStart = -1; + snapshot.add(new Range(runStart, runEnd)); } - if (ranges.isEmpty()) { + if (snapshot.isEmpty()) { return RowRanges.EMPTY; } - return new RowRanges(ranges); + return new RowRanges(snapshot); } } } diff --git a/parquet-column/src/test/java/org/apache/parquet/internal/filter2/columnindex/TestRowRanges.java b/parquet-column/src/test/java/org/apache/parquet/internal/filter2/columnindex/TestRowRanges.java index 34cd0f8203..585fc84c74 100644 --- a/parquet-column/src/test/java/org/apache/parquet/internal/filter2/columnindex/TestRowRanges.java +++ b/parquet-column/src/test/java/org/apache/parquet/internal/filter2/columnindex/TestRowRanges.java @@ -157,10 +157,10 @@ public void testIntersection() { public void testBuilderBasic() { // Select rows 2, 3, 4, 5 (one contiguous run) RowRanges ranges = RowRanges.builder() - .addSelected(2) - .addSelected(3) - .addSelected(4) - .addSelected(5) + .addSelectedRow(2) + .addSelectedRow(3) + .addSelectedRow(4) + .addSelectedRow(5) .build(); assertAllRowsEqual(ranges.iterator(), 2, 3, 4, 5); assertEquals(4, ranges.rowCount()); @@ -170,11 +170,11 @@ public void testBuilderBasic() { public void testBuilderMultipleRanges() { // Two runs: 1-2 and 5-7 RowRanges ranges = RowRanges.builder() - .addSelected(1) - .addSelected(2) - .addSelected(5) - .addSelected(6) - .addSelected(7) + .addSelectedRow(1) + .addSelectedRow(2) + .addSelectedRow(5) + .addSelectedRow(6) + .addSelectedRow(7) .build(); assertAllRowsEqual(ranges.iterator(), 1, 2, 5, 6, 7); assertEquals(5, ranges.rowCount()); @@ -197,7 +197,7 @@ public void testBuilderAllSelected() { // Five contiguous rows starting at 0 RowRanges.Builder builder = RowRanges.builder(); for (long i = 0; i < 5; i++) { - builder.addSelected(i); + builder.addSelectedRow(i); } RowRanges ranges = builder.build(); assertAllRowsEqual(ranges.iterator(), 0, 1, 2, 3, 4); @@ -206,7 +206,7 @@ public void testBuilderAllSelected() { @Test public void testBuilderSingleRow() { - RowRanges ranges = RowRanges.builder().addSelected(3).build(); + RowRanges ranges = RowRanges.builder().addSelectedRow(3).build(); assertAllRowsEqual(ranges.iterator(), 3); assertEquals(1, ranges.rowCount()); assertTrue(ranges.isOverlapping(3, 3)); @@ -219,7 +219,7 @@ public void testBuilderAlternating() { // Every other row selected: 0, 2, 4, 6, 8 — five singleton runs. RowRanges.Builder builder = RowRanges.builder(); for (long i = 0; i < 10; i += 2) { - builder.addSelected(i); + builder.addSelectedRow(i); } RowRanges ranges = builder.build(); assertAllRowsEqual(ranges.iterator(), 0, 2, 4, 6, 8); @@ -228,16 +228,17 @@ public void testBuilderAlternating() { @Test public void testBuilderFirstAndLast() { - RowRanges ranges = RowRanges.builder().addSelected(0).addSelected(99).build(); + RowRanges ranges = + RowRanges.builder().addSelectedRow(0).addSelectedRow(99).build(); assertAllRowsEqual(ranges.iterator(), 0, 99); assertEquals(2, ranges.rowCount()); } @Test public void testBuilderRejectsOutOfOrder() { - RowRanges.Builder builder = RowRanges.builder().addSelected(5).addSelected(7); + RowRanges.Builder builder = RowRanges.builder().addSelectedRow(5).addSelectedRow(7); try { - builder.addSelected(6); + builder.addSelectedRow(6); org.junit.Assert.fail("expected IllegalArgumentException for out-of-order index"); } catch (IllegalArgumentException expected) { // expected @@ -246,12 +247,43 @@ public void testBuilderRejectsOutOfOrder() { @Test public void testBuilderRejectsDuplicate() { - RowRanges.Builder builder = RowRanges.builder().addSelected(3); + RowRanges.Builder builder = RowRanges.builder().addSelectedRow(3); try { - builder.addSelected(3); + builder.addSelectedRow(3); org.junit.Assert.fail("expected IllegalArgumentException for duplicate index"); } catch (IllegalArgumentException expected) { // expected } } + + @Test + public void testBuilderRejectsNegativeRow() { + RowRanges.Builder builder = RowRanges.builder(); + try { + builder.addSelectedRow(-1); + org.junit.Assert.fail("expected IllegalArgumentException for negative index"); + } catch (IllegalArgumentException expected) { + // expected + } + } + + @Test + public void testBuilderBuildReturnsSnapshot() { + // build() must return a snapshot: continuing to use the builder afterwards must not + // mutate a previously built result. + RowRanges.Builder builder = RowRanges.builder().addSelectedRow(0).addSelectedRow(1); + RowRanges first = builder.build(); + assertAllRowsEqual(first.iterator(), 0, 1); + assertEquals(2, first.rowCount()); + + builder.addSelectedRow(5); + RowRanges second = builder.build(); + + // The first result is unchanged. + assertAllRowsEqual(first.iterator(), 0, 1); + assertEquals(2, first.rowCount()); + // The second result reflects the additional row. + assertAllRowsEqual(second.iterator(), 0, 1, 5); + assertEquals(3, second.rowCount()); + } } From 8d8e2dd71d51a639cd61f27b9e340b3937bc3f51 Mon Sep 17 00:00:00 2001 From: Peter Toth Date: Tue, 30 Jun 2026 17:04:48 +0200 Subject: [PATCH 3/6] move RowRanges --- .../filter2/columnindex/RowRanges.java | 5 +++-- .../filter2/columnindex/ColumnIndexFilter.java | 1 + .../filter2/columnindex/TestRowRanges.java | 6 +++--- .../filter2/columnindex/TestColumnIndexFilter.java | 1 + .../parquet/hadoop/ColumnChunkPageReadStore.java | 2 +- .../parquet/hadoop/ColumnIndexFilterUtils.java | 2 +- .../org/apache/parquet/hadoop/ParquetFileReader.java | 2 +- pom.xml | 12 ++++++++++++ 8 files changed, 23 insertions(+), 8 deletions(-) rename parquet-column/src/main/java/org/apache/parquet/{internal => }/filter2/columnindex/RowRanges.java (98%) rename parquet-column/src/test/java/org/apache/parquet/{internal => }/filter2/columnindex/TestRowRanges.java (97%) diff --git a/parquet-column/src/main/java/org/apache/parquet/internal/filter2/columnindex/RowRanges.java b/parquet-column/src/main/java/org/apache/parquet/filter2/columnindex/RowRanges.java similarity index 98% rename from parquet-column/src/main/java/org/apache/parquet/internal/filter2/columnindex/RowRanges.java rename to parquet-column/src/main/java/org/apache/parquet/filter2/columnindex/RowRanges.java index e3c388c17a..4cb0d3c711 100644 --- a/parquet-column/src/main/java/org/apache/parquet/internal/filter2/columnindex/RowRanges.java +++ b/parquet-column/src/main/java/org/apache/parquet/filter2/columnindex/RowRanges.java @@ -16,7 +16,7 @@ * specific language governing permissions and limitations * under the License. */ -package org.apache.parquet.internal.filter2.columnindex; +package org.apache.parquet.filter2.columnindex; import java.util.ArrayList; import java.util.Collections; @@ -33,7 +33,8 @@ * filtering. To be used iterate over the matching row indexes to be read from a row-group, retrieve the count of the * matching rows or check overlapping of a row index range. * - * @see ColumnIndexFilter#calculateRowRanges(Filter, ColumnIndexStore, Set, long) + * @see org.apache.parquet.internal.filter2.columnindex.ColumnIndexFilter#calculateRowRanges(Filter, + * org.apache.parquet.internal.filter2.columnindex.ColumnIndexStore, Set, long) */ public class RowRanges { // Make it public because some uppler layer application need to access it diff --git a/parquet-column/src/main/java/org/apache/parquet/internal/filter2/columnindex/ColumnIndexFilter.java b/parquet-column/src/main/java/org/apache/parquet/internal/filter2/columnindex/ColumnIndexFilter.java index fd26e54d7f..e58b258157 100644 --- a/parquet-column/src/main/java/org/apache/parquet/internal/filter2/columnindex/ColumnIndexFilter.java +++ b/parquet-column/src/main/java/org/apache/parquet/internal/filter2/columnindex/ColumnIndexFilter.java @@ -21,6 +21,7 @@ import java.util.PrimitiveIterator; import java.util.Set; import java.util.function.Function; +import org.apache.parquet.filter2.columnindex.RowRanges; import org.apache.parquet.filter2.compat.FilterCompat; import org.apache.parquet.filter2.compat.FilterCompat.FilterPredicateCompat; import org.apache.parquet.filter2.compat.FilterCompat.NoOpFilter; diff --git a/parquet-column/src/test/java/org/apache/parquet/internal/filter2/columnindex/TestRowRanges.java b/parquet-column/src/test/java/org/apache/parquet/filter2/columnindex/TestRowRanges.java similarity index 97% rename from parquet-column/src/test/java/org/apache/parquet/internal/filter2/columnindex/TestRowRanges.java rename to parquet-column/src/test/java/org/apache/parquet/filter2/columnindex/TestRowRanges.java index 585fc84c74..c977eede24 100644 --- a/parquet-column/src/test/java/org/apache/parquet/internal/filter2/columnindex/TestRowRanges.java +++ b/parquet-column/src/test/java/org/apache/parquet/filter2/columnindex/TestRowRanges.java @@ -16,10 +16,10 @@ * specific language governing permissions and limitations * under the License. */ -package org.apache.parquet.internal.filter2.columnindex; +package org.apache.parquet.filter2.columnindex; -import static org.apache.parquet.internal.filter2.columnindex.RowRanges.intersection; -import static org.apache.parquet.internal.filter2.columnindex.RowRanges.union; +import static org.apache.parquet.filter2.columnindex.RowRanges.intersection; +import static org.apache.parquet.filter2.columnindex.RowRanges.union; import static org.junit.Assert.assertArrayEquals; import static org.junit.Assert.assertEquals; import static org.junit.Assert.assertFalse; diff --git a/parquet-column/src/test/java/org/apache/parquet/internal/filter2/columnindex/TestColumnIndexFilter.java b/parquet-column/src/test/java/org/apache/parquet/internal/filter2/columnindex/TestColumnIndexFilter.java index 1574ce2474..59ec31a822 100644 --- a/parquet-column/src/test/java/org/apache/parquet/internal/filter2/columnindex/TestColumnIndexFilter.java +++ b/parquet-column/src/test/java/org/apache/parquet/internal/filter2/columnindex/TestColumnIndexFilter.java @@ -61,6 +61,7 @@ import java.util.Set; import java.util.stream.LongStream; import org.apache.parquet.bytes.BytesUtils; +import org.apache.parquet.filter2.columnindex.RowRanges; import org.apache.parquet.filter2.compat.FilterCompat; import org.apache.parquet.filter2.predicate.Statistics; import org.apache.parquet.filter2.predicate.UserDefinedPredicate; diff --git a/parquet-hadoop/src/main/java/org/apache/parquet/hadoop/ColumnChunkPageReadStore.java b/parquet-hadoop/src/main/java/org/apache/parquet/hadoop/ColumnChunkPageReadStore.java index c7fc22b29f..265f65d11a 100644 --- a/parquet-hadoop/src/main/java/org/apache/parquet/hadoop/ColumnChunkPageReadStore.java +++ b/parquet-hadoop/src/main/java/org/apache/parquet/hadoop/ColumnChunkPageReadStore.java @@ -42,9 +42,9 @@ import org.apache.parquet.compression.CompressionCodecFactory.BytesInputDecompressor; import org.apache.parquet.crypto.AesCipher; import org.apache.parquet.crypto.ModuleCipherFactory.ModuleType; +import org.apache.parquet.filter2.columnindex.RowRanges; import org.apache.parquet.format.BlockCipher; import org.apache.parquet.internal.column.columnindex.OffsetIndex; -import org.apache.parquet.internal.filter2.columnindex.RowRanges; import org.apache.parquet.io.ParquetDecodingException; import org.slf4j.Logger; import org.slf4j.LoggerFactory; diff --git a/parquet-hadoop/src/main/java/org/apache/parquet/hadoop/ColumnIndexFilterUtils.java b/parquet-hadoop/src/main/java/org/apache/parquet/hadoop/ColumnIndexFilterUtils.java index e783815747..9c9e2a6cfa 100644 --- a/parquet-hadoop/src/main/java/org/apache/parquet/hadoop/ColumnIndexFilterUtils.java +++ b/parquet-hadoop/src/main/java/org/apache/parquet/hadoop/ColumnIndexFilterUtils.java @@ -28,9 +28,9 @@ import java.util.Formatter; import java.util.List; import java.util.Optional; +import org.apache.parquet.filter2.columnindex.RowRanges; import org.apache.parquet.hadoop.metadata.ColumnChunkMetaData; import org.apache.parquet.internal.column.columnindex.OffsetIndex; -import org.apache.parquet.internal.filter2.columnindex.RowRanges; /** * Internal utility class to help at column index based filtering. diff --git a/parquet-hadoop/src/main/java/org/apache/parquet/hadoop/ParquetFileReader.java b/parquet-hadoop/src/main/java/org/apache/parquet/hadoop/ParquetFileReader.java index e0b0d76e0e..565cf893cc 100644 --- a/parquet-hadoop/src/main/java/org/apache/parquet/hadoop/ParquetFileReader.java +++ b/parquet-hadoop/src/main/java/org/apache/parquet/hadoop/ParquetFileReader.java @@ -85,6 +85,7 @@ import org.apache.parquet.crypto.InternalFileDecryptor; import org.apache.parquet.crypto.ModuleCipherFactory.ModuleType; import org.apache.parquet.crypto.ParquetCryptoRuntimeException; +import org.apache.parquet.filter2.columnindex.RowRanges; import org.apache.parquet.filter2.compat.FilterCompat; import org.apache.parquet.filter2.compat.RowGroupFilter; import org.apache.parquet.format.BlockCipher; @@ -111,7 +112,6 @@ import org.apache.parquet.internal.column.columnindex.OffsetIndex; import org.apache.parquet.internal.filter2.columnindex.ColumnIndexFilter; import org.apache.parquet.internal.filter2.columnindex.ColumnIndexStore; -import org.apache.parquet.internal.filter2.columnindex.RowRanges; import org.apache.parquet.internal.hadoop.metadata.IndexReference; import org.apache.parquet.io.InputFile; import org.apache.parquet.io.ParquetDecodingException; diff --git a/pom.xml b/pom.xml index 9eb31d5ee5..90333144cf 100644 --- a/pom.xml +++ b/pom.xml @@ -604,6 +604,18 @@ org.apache.parquet.avro.AvroReadSupport#AVRO_REQUESTED_PROJECTION org.apache.parquet.avro.AvroReadSupport#AVRO_DATA_SUPPLIER org.apache.parquet.hadoop.ParquetFileReader#PARQUET_READ_PARALLELISM + + org.apache.parquet.internal.filter2.columnindex.RowRanges + org.apache.parquet.internal.filter2.columnindex.RowRanges$Range + org.apache.parquet.internal.filter2.columnindex.ColumnIndexFilter + + org.apache.parquet.hadoop.ParquetFileReader#readFilteredRowGroup(int,org.apache.parquet.internal.filter2.columnindex.RowRanges) + org.apache.parquet.hadoop.util.CompressionConverter$TransParquetFileReader#readFilteredRowGroup(int,org.apache.parquet.internal.filter2.columnindex.RowRanges) From 5f53c99d0d94782e73ac56e5b016d0b1503a27f9 Mon Sep 17 00:00:00 2001 From: Peter Toth Date: Tue, 30 Jun 2026 17:19:15 +0200 Subject: [PATCH 4/6] fix compile --- .../apache/parquet/hadoop/TestParquetFileReaderRowRanges.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/parquet-hadoop/src/test/java/org/apache/parquet/hadoop/TestParquetFileReaderRowRanges.java b/parquet-hadoop/src/test/java/org/apache/parquet/hadoop/TestParquetFileReaderRowRanges.java index e445caf2bc..72fdd37180 100644 --- a/parquet-hadoop/src/test/java/org/apache/parquet/hadoop/TestParquetFileReaderRowRanges.java +++ b/parquet-hadoop/src/test/java/org/apache/parquet/hadoop/TestParquetFileReaderRowRanges.java @@ -31,10 +31,10 @@ import org.apache.parquet.ParquetReadOptions; import org.apache.parquet.example.data.Group; import org.apache.parquet.example.data.simple.SimpleGroupFactory; +import org.apache.parquet.filter2.columnindex.RowRanges; import org.apache.parquet.hadoop.example.ExampleParquetWriter; import org.apache.parquet.hadoop.metadata.BlockMetaData; import org.apache.parquet.hadoop.util.HadoopInputFile; -import org.apache.parquet.internal.filter2.columnindex.RowRanges; import org.apache.parquet.schema.MessageType; import org.apache.parquet.schema.MessageTypeParser; import org.junit.Before; From fa56ff25b15d4c106dea50210fffaac035db757b Mon Sep 17 00:00:00 2001 From: Peter Toth Date: Wed, 1 Jul 2026 14:06:29 +0200 Subject: [PATCH 5/6] GH-3596: Keep deprecated RowRanges shim to preserve readFilteredRowGroup compatibility Recreate org.apache.parquet.internal.filter2.columnindex.RowRanges as a deprecated subclass of the relocated org.apache.parquet.filter2.columnindex.RowRanges so the released ParquetFileReader#readFilteredRowGroup(int, RowRanges) signature keeps linking. Add a deprecated readFilteredRowGroup overload taking the old internal type that delegates to the new one, and drop the japicmp exclusions for that method now that it is compatible. Other internal-only surfaces (RowRanges statics, Range, ColumnIndexFilter return types) remain relocated without a bridge. --- .../filter2/columnindex/RowRanges.java | 5 +++- .../filter2/columnindex/RowRanges.java | 29 +++++++++++++++++++ .../parquet/hadoop/ParquetFileReader.java | 16 ++++++++++ pom.xml | 15 ++++------ 4 files changed, 55 insertions(+), 10 deletions(-) create mode 100644 parquet-column/src/main/java/org/apache/parquet/internal/filter2/columnindex/RowRanges.java diff --git a/parquet-column/src/main/java/org/apache/parquet/filter2/columnindex/RowRanges.java b/parquet-column/src/main/java/org/apache/parquet/filter2/columnindex/RowRanges.java index 4cb0d3c711..5666e049db 100644 --- a/parquet-column/src/main/java/org/apache/parquet/filter2/columnindex/RowRanges.java +++ b/parquet-column/src/main/java/org/apache/parquet/filter2/columnindex/RowRanges.java @@ -96,7 +96,10 @@ public String toString() { private final List ranges; - private RowRanges() { + // Visible for the deprecated org.apache.parquet.internal.filter2.columnindex.RowRanges shim, + // which subclasses this type so the released ParquetFileReader#readFilteredRowGroup(int, RowRanges) + // signature keeps working. Remove once that shim is dropped (2.0). + protected RowRanges() { this(new ArrayList<>()); } diff --git a/parquet-column/src/main/java/org/apache/parquet/internal/filter2/columnindex/RowRanges.java b/parquet-column/src/main/java/org/apache/parquet/internal/filter2/columnindex/RowRanges.java new file mode 100644 index 0000000000..51fe0f1b74 --- /dev/null +++ b/parquet-column/src/main/java/org/apache/parquet/internal/filter2/columnindex/RowRanges.java @@ -0,0 +1,29 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.parquet.internal.filter2.columnindex; + +/** + * @deprecated moved to {@link org.apache.parquet.filter2.columnindex.RowRanges}. This type is + * retained only so that the released + * {@link org.apache.parquet.hadoop.ParquetFileReader#readFilteredRowGroup(int, RowRanges)} + * signature keeps linking; it will be removed in 2.0. Use + * {@link org.apache.parquet.filter2.columnindex.RowRanges} instead. + */ +@Deprecated +public class RowRanges extends org.apache.parquet.filter2.columnindex.RowRanges {} diff --git a/parquet-hadoop/src/main/java/org/apache/parquet/hadoop/ParquetFileReader.java b/parquet-hadoop/src/main/java/org/apache/parquet/hadoop/ParquetFileReader.java index 1aa2eb3bdc..9af4b4ac60 100644 --- a/parquet-hadoop/src/main/java/org/apache/parquet/hadoop/ParquetFileReader.java +++ b/parquet-hadoop/src/main/java/org/apache/parquet/hadoop/ParquetFileReader.java @@ -1268,6 +1268,22 @@ public ColumnChunkPageReadStore readFilteredRowGroup(int blockIndex, RowRanges r return internalReadFilteredRowGroup(block, rowRanges, getColumnIndexStore(blockIndex)); } + /** + * @param blockIndex the index of the requested block + * @param rowRanges the row ranges to be read from the requested block + * @return the PageReadStore which can provide PageReaders for each column or null if there are no rows in this block + * @throws IOException if an error occurs while reading + * @throws IllegalArgumentException if the {@code blockIndex} is invalid or the {@code rowRanges} is null + * @deprecated use {@link #readFilteredRowGroup(int, RowRanges)} with + * {@link org.apache.parquet.filter2.columnindex.RowRanges} instead. This overload is retained + * for backward compatibility and will be removed in 2.0. + */ + @Deprecated + public ColumnChunkPageReadStore readFilteredRowGroup( + int blockIndex, org.apache.parquet.internal.filter2.columnindex.RowRanges rowRanges) throws IOException { + return readFilteredRowGroup(blockIndex, (RowRanges) rowRanges); + } + /** * Read data in all parts via either vectored IO or serial IO. * @param allParts all parts to be read. diff --git a/pom.xml b/pom.xml index d171785c69..c689e6b10b 100644 --- a/pom.xml +++ b/pom.xml @@ -611,18 +611,15 @@ org.apache.parquet.avro.AvroReadSupport#AVRO_REQUESTED_PROJECTION org.apache.parquet.avro.AvroReadSupport#AVRO_DATA_SUPPLIER org.apache.parquet.hadoop.ParquetFileReader#PARQUET_READ_PARALLELISM - + org.apache.parquet.internal.filter2.columnindex.RowRanges org.apache.parquet.internal.filter2.columnindex.RowRanges$Range org.apache.parquet.internal.filter2.columnindex.ColumnIndexFilter - - org.apache.parquet.hadoop.ParquetFileReader#readFilteredRowGroup(int,org.apache.parquet.internal.filter2.columnindex.RowRanges) - org.apache.parquet.hadoop.util.CompressionConverter$TransParquetFileReader#readFilteredRowGroup(int,org.apache.parquet.internal.filter2.columnindex.RowRanges) From 8f17e2aa987ce18e3ddb9e78b5e05d4ed72c015b Mon Sep 17 00:00:00 2001 From: Peter Toth Date: Wed, 1 Jul 2026 14:24:04 +0200 Subject: [PATCH 6/6] GH-3596: Fix javadoc reference in deprecated RowRanges shim ParquetFileReader lives in the downstream parquet-hadoop module, so a {@link} to it cannot be resolved from parquet-column's javadoc. Use a {@code} reference instead. --- .../parquet/internal/filter2/columnindex/RowRanges.java | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/parquet-column/src/main/java/org/apache/parquet/internal/filter2/columnindex/RowRanges.java b/parquet-column/src/main/java/org/apache/parquet/internal/filter2/columnindex/RowRanges.java index 51fe0f1b74..30709618e4 100644 --- a/parquet-column/src/main/java/org/apache/parquet/internal/filter2/columnindex/RowRanges.java +++ b/parquet-column/src/main/java/org/apache/parquet/internal/filter2/columnindex/RowRanges.java @@ -21,9 +21,8 @@ /** * @deprecated moved to {@link org.apache.parquet.filter2.columnindex.RowRanges}. This type is * retained only so that the released - * {@link org.apache.parquet.hadoop.ParquetFileReader#readFilteredRowGroup(int, RowRanges)} - * signature keeps linking; it will be removed in 2.0. Use - * {@link org.apache.parquet.filter2.columnindex.RowRanges} instead. + * {@code ParquetFileReader#readFilteredRowGroup(int, RowRanges)} signature keeps linking; it + * will be removed in 2.0. Use {@link org.apache.parquet.filter2.columnindex.RowRanges} instead. */ @Deprecated public class RowRanges extends org.apache.parquet.filter2.columnindex.RowRanges {}