From 52e61fdd31f262fc40bde20d3d9d9be07efd4889 Mon Sep 17 00:00:00 2001 From: Christoph Pirkl Date: Tue, 12 May 2026 10:50:23 +0200 Subject: [PATCH 1/2] #90: Create user guide --- .github/workflows/javadoc_publish.yml | 88 ++++++++++++++++++++++++++ README.md | 2 + doc/changesets/90-create-user-guide.md | 75 ++++++++++++++++++++++ doc/user_guide.md | 67 ++++++++++++++++++++ 4 files changed, 232 insertions(+) create mode 100644 .github/workflows/javadoc_publish.yml create mode 100644 doc/changesets/90-create-user-guide.md create mode 100644 doc/user_guide.md diff --git a/.github/workflows/javadoc_publish.yml b/.github/workflows/javadoc_publish.yml new file mode 100644 index 0000000..e1547e3 --- /dev/null +++ b/.github/workflows/javadoc_publish.yml @@ -0,0 +1,88 @@ +name: Publish JavaDoc + +on: + push: + branches: [ main ] + workflow_dispatch: + pull_request: + +permissions: + contents: read + +jobs: + build-javadoc: + runs-on: ubuntu-latest + steps: + - name: Checkout the repository + uses: actions/checkout@v6 + with: + persist-credentials: false + + - name: Set up Java (11 + 17) + uses: actions/setup-java@v5 + with: + distribution: temurin + java-version: | + 11 + 17 + cache: maven + + # [impl->dsn~publishing-javadoc-api-documentation~1] + - name: Build JavaDoc + run: | + mvn --batch-mode javadoc:javadoc \ + -DossindexSkip=true \ + -Dorg.slf4j.simpleLogger.log.org.apache.maven.cli.transfer.Slf4jMavenTransferListener=warn \ + -DtrimStackTrace=false + + # [itest->dsn~publishing-javadoc-api-documentation~1] + - name: Prepare Pages artifact + run: | + mkdir -p ./target/pages/latest + cp -R ./target/reports/apidocs ./target/pages/latest/api + cat > ./target/pages/index.html <<'HTML' + + + + + + Parquet IO Java JavaDoc + + +

Redirecting to latest JavaDoc...

+ + + HTML + + - name: Upload pages artifact + uses: actions/upload-pages-artifact@v5 + with: + path: ./target/pages + + - name: "Report Status" + if: always() + uses: ravsamhq/notify-slack-action@be814b201e233b2dc673608aa46e5447c8ab13f2 + with: + status: ${{ job.status }} + notify_when: "failure" + notification_title: "Parquet IO Java: {workflow} has {status_message}" + message_format: "{emoji} *{workflow}* {status_message} in <{repo_url}|{repo}>" + footer: "Linked to Repo <{repo_url}|{repo}>" + env: + SLACK_WEBHOOK_URL: ${{ secrets.INTEGRATION_TEAM_SLACK_NOTIFICATION_WEBHOOK }} + + deploy-pages: + if: ${{ github.ref == 'refs/heads/main' }} + needs: build-javadoc + runs-on: ubuntu-latest + permissions: + contents: read + pages: write + id-token: write + environment: + name: github-pages + url: ${{ steps.deployment.outputs.page_url }} + steps: + - name: Deploy to GitHub Pages + id: deployment + uses: actions/deploy-pages@v5 diff --git a/README.md b/README.md index 0cd42c6..d076703 100644 --- a/README.md +++ b/README.md @@ -125,6 +125,8 @@ The PIOJ reads it into a list of person maps: ## Information for Users +- [User Guide](doc/user_guide.md) +- [JavaDoc API](https://exasol.github.io/parquet-io-java/latest/api/index.html) - [Changelog](doc/changes/changelog.md) - [Dependencies](dependencies.md) diff --git a/doc/changesets/90-create-user-guide.md b/doc/changesets/90-create-user-guide.md new file mode 100644 index 0000000..3858b67 --- /dev/null +++ b/doc/changesets/90-create-user-guide.md @@ -0,0 +1,75 @@ +# GH-90 Create user guide + +## Goal + +Provide a developer-focused user guide and published JavaDoc API documentation +for Parquet IO Java, including CI automation for generating and publishing the +API docs. + +Issue: + +## Scope + +In scope: + +* Add and maintain a developer user guide at `doc/user_guide.md`. +* Document Maven Central consumption and basic usage examples in the user guide. +* Build JavaDoc API documentation in CI. +* Publish JavaDoc API documentation to GitHub Pages. +* Link published JavaDoc API documentation from the user guide. +* Update README links where needed so users can discover the user guide. + +Out of scope: + +* Changing library runtime behavior. +* Changing existing reader APIs. +* Adding third-party dependencies unless design review explicitly approves. + +## Design References + +* [System Requirements](../system_requirements.md) +* [Software Architectural Design](../design.md) +* [Quality Requirements](../design/quality_requirements.md) +* [CI Build Workflow](../../.github/workflows/ci-build.yml) +* [Release Workflow](../../.github/workflows/release.yml) + +## Strategy + +Follow spec-first delivery: update traced requirements and design first, then +implement workflow and documentation updates, and finally verify trace/build +quality gates. + +## Task List + +- [ ] Create and checkout a new Git branch + `feature/90-create-user-guide` + +### Requirements And Design + +- [x] Add user-facing requirement coverage for developer documentation in + `doc/system_requirements.md`. +- [x] Stop and ask user for a review of the system requirements. +- [x] Add/update design items for JavaDoc generation and GitHub Pages + publication in `doc/design.md`. +- [ ] Stop and ask user for a review of the design. + +### Implementation + +- [x] Add `doc/user_guide.md` with Maven Central dependency instructions, + JavaDoc link, and usage examples. +- [x] Add or update GitHub workflow to build and publish JavaDoc API docs to + GitHub Pages. +- [x] Update project documentation links in `README.md` to include the user + guide. + +### Verification + +- [ ] Run OpenFastTrace and keep the trace clean. +- [ ] Run required Maven build checks from quality requirements. +- [ ] Validate the workflow changes are syntactically correct and reference + existing repository paths. + +## Version And Changelog Update + +- [ ] Decide version impact for issue GH-90. +- [ ] Add a changelog entry for the next release if required. diff --git a/doc/user_guide.md b/doc/user_guide.md new file mode 100644 index 0000000..e158237 --- /dev/null +++ b/doc/user_guide.md @@ -0,0 +1,67 @@ +# User Guide + +This guide is for developers integrating Parquet IO Java into their applications. + +## Get The Library From Maven Central + +Add Parquet IO Java to your Maven dependencies. + +```xml + + com.exasol + parquet-io-java + $version + +``` + +For the newest release, check [Maven Central](https://search.maven.org/artifact/com.exasol/parquet-io-java). + +## API Documentation + +The JavaDoc API documentation is published [here](https://exasol.github.io/parquet-io-java/latest/api/index.html). + +## Basic Usage Examples + +### Read All Rows From A Parquet File + +```java +final Path path = new Path("/data/parquet/part-0000.parquet"); +final Configuration conf = new Configuration(); +try (final ParquetReader reader = RowParquetReader + .builder(HadoopInputFile.fromPath(path, conf)).build()) { + Row row = reader.read(); + while (row != null) { + final List values = row.getValues(); + System.out.println(values); + row = reader.read(); + } +} +``` + +### Read A Single Column By Name + +```java +try (final ParquetReader reader = RowParquetReader + .builder(HadoopInputFile.fromPath(path, conf)).build()) { + Row row = reader.read(); + while (row != null) { + final Object customerId = row.getValue("customer_id"); + System.out.println(customerId); + row = reader.read(); + } +} +``` + +### Read Only Selected Row Groups + +```java +final List intervals = List.of(new ChunkIntervalImpl(0, 2)); +try (final RowParquetChunkReader reader = RowParquetChunkReader + .builder(HadoopInputFile.fromPath(path, conf), intervals).build()) { + Row row = reader.read(); + while (row != null) { + System.out.println(row.getValues()); + row = reader.read(); + } +} +``` From 0e6c771894f79e3c8ef02180b9d3a77b83b1ff70 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sebastian=20B=C3=A4r?= Date: Tue, 12 May 2026 11:07:35 +0200 Subject: [PATCH 2/2] Apply suggestions from code review MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Co-authored-by: Sebastian Bär --- .github/workflows/javadoc_publish.yml | 6 ++---- doc/user_guide.md | 2 +- 2 files changed, 3 insertions(+), 5 deletions(-) diff --git a/.github/workflows/javadoc_publish.yml b/.github/workflows/javadoc_publish.yml index e1547e3..8bf6427 100644 --- a/.github/workflows/javadoc_publish.yml +++ b/.github/workflows/javadoc_publish.yml @@ -6,11 +6,11 @@ on: workflow_dispatch: pull_request: -permissions: - contents: read jobs: build-javadoc: + permissions: + contents: read runs-on: ubuntu-latest steps: - name: Checkout the repository @@ -27,7 +27,6 @@ jobs: 17 cache: maven - # [impl->dsn~publishing-javadoc-api-documentation~1] - name: Build JavaDoc run: | mvn --batch-mode javadoc:javadoc \ @@ -35,7 +34,6 @@ jobs: -Dorg.slf4j.simpleLogger.log.org.apache.maven.cli.transfer.Slf4jMavenTransferListener=warn \ -DtrimStackTrace=false - # [itest->dsn~publishing-javadoc-api-documentation~1] - name: Prepare Pages artifact run: | mkdir -p ./target/pages/latest diff --git a/doc/user_guide.md b/doc/user_guide.md index e158237..6940e30 100644 --- a/doc/user_guide.md +++ b/doc/user_guide.md @@ -10,7 +10,7 @@ Add Parquet IO Java to your Maven dependencies. com.exasol parquet-io-java - $version + <latest-version> ```