diff --git a/.github/ci-config.yml b/.github/ci-config.yml new file mode 100644 index 000000000..416a96d14 --- /dev/null +++ b/.github/ci-config.yml @@ -0,0 +1,13 @@ +dependencies: | + ecmwf/ecbuild + MathisRosenhauer/libaec@refs/tags/v1.1.3 + ecmwf/eccodes + ecmwf/eckit@develop + ecmwf/metkit + ecmwf/fdb + ecmwf/gribjump +dependency_cmake_options: | + ecmwf/gribjump: "-DENABLE_FDB_BUILD_TOOLS=ON" +dependency_branch: develop +parallelism_factor: 8 +self_build: false \ No newline at end of file diff --git a/.github/ci-hpc-config.yml b/.github/ci-hpc-config.yml new file mode 100644 index 000000000..98a9a857c --- /dev/null +++ b/.github/ci-hpc-config.yml @@ -0,0 +1,21 @@ +build: + python: "3.10" + modules: + - ninja + - aec + dependencies: + - ecmwf/ecbuild@develop + - ecmwf/eccodes@develop + - ecmwf/eckit@develop + - ecmwf/metkit@develop + - ecmwf/fdb@develop + - ecmwf/gribjump@develop + dependency_cmake_options: + - "ecmwf/gribjump: '-DENABLE_FDB_BUILD_TOOLS=ON'" + - "ecmwf/fdb: '-DENABLE_LUSTRE=OFF'" + parallel: 64 + env: + - ECCODES_SAMPLES_PATH=$ECCODES_DIR/share/eccodes/samples + - ECCODES_DEFINITION_PATH=$ECCODES_DIR/share/eccodes/definitions + pytest_cmd: | + python -m pytest -m 'not fdb' tests --cov=./ --cov-report=xml diff --git a/.github/workflows/cd.yml b/.github/workflows/cd.yml new file mode 100644 index 000000000..ae4568c03 --- /dev/null +++ b/.github/workflows/cd.yml @@ -0,0 +1,11 @@ +name: cd + +on: + push: + tags: + - '**' + +jobs: + deploy: + uses: ecmwf-actions/reusable-workflows/.github/workflows/create-package.yml@v2 + secrets: inherit \ No newline at end of file diff --git a/.github/workflows/ci.yaml b/.github/workflows/ci.yaml index 24a5887c9..81fb4033d 100644 --- a/.github/workflows/ci.yaml +++ b/.github/workflows/ci.yaml @@ -1,145 +1,45 @@ name: ci + on: # Trigger the workflow on push to master or develop, except tag creation push: branches: - - 'main' - - 'develop' + - "main" + - "develop" + tags-ignore: + - "**" + # Trigger the workflow on pull request pull_request: ~ + # Trigger the workflow manually workflow_dispatch: ~ + # Trigger after public PR approved for CI pull_request_target: types: [labeled] - release: - types: [created] -jobs: - qa: - name: qa - runs-on: ubuntu-20.04 - steps: - - name: Checkout Repository - uses: actions/checkout@v3 - with: - repository: ${{ inputs.repository }} - ref: ${{ inputs.ref }} - - name: Setup Python - uses: actions/setup-python@v4 - with: - python-version: ${{ inputs.python_version }} - - - name: Install Python Dependencies - run: | - python -m pip install --upgrade pip - python -m pip install black flake8 isort - - name: Check isort - run: isort --check . - - - name: Check black - run: black --check . - - - name: Check flake8 - run: flake8 . - setup: - name: setup - runs-on: ubuntu-20.04 - outputs: - matrix: ${{ steps.set-matrix.outputs.matrix }} - inputs: ${{ steps.prepare-inputs.outputs.inputs }} - inputs-for-ubuntu: ${{ steps.prepare-inputs.outputs.inputs-for-ubuntu }} - steps: - - name: Set Matrix - id: set-matrix - shell: bash -eux {0} - run: | - MATRIX=$(cat << 'EOS' - name: - - gnu-11@ubuntu-22.04 - - clang-14@ubuntu-22.04 - include: - - name: gnu-11@ubuntu-22.04 - os: ubuntu-22.04 - compiler: gnu-11 - compiler_cc: gcc-11 - compiler_cxx: g++-11 - compiler_fc: gfortran-11 - - name: clang-14@ubuntu-22.04 - os: ubuntu-22.04 - compiler: clang-14 - compiler_cc: clang-14 - compiler_cxx: clang++-14 - compiler_fc: gfortran-11 - # Xcode compiler requires empty environment variables, so we pass null (~) here - EOS - ) - SKIP_MATRIX_JOBS=$(cat << 'EOS' - ${{ inputs.skip_matrix_jobs }} - EOS - ) - SELECT_NAME_COND="1 != 1" - SELECT_INCLUDE_COND="1 != 1" - for skip_job in $SKIP_MATRIX_JOBS; do SELECT_NAME_COND="$SELECT_NAME_COND or . == \"$skip_job\""; SELECT_INCLUDE_COND="$SELECT_INCLUDE_COND or .name == \"$skip_job\""; done - echo matrix=$(echo "$MATRIX" | yq eval "del(.name[] | select($SELECT_NAME_COND)) | del(.include[] | select($SELECT_INCLUDE_COND))" --output-format json --indent 0 -) >> $GITHUB_OUTPUT - - name: Prepare build-package Inputs - id: prepare-inputs - shell: bash -eux {0} - run: | - echo inputs=$(echo "${{ inputs.build_package_inputs || '{}' }}" | yq eval '.' --output-format json --indent 0 -) >> $GITHUB_OUTPUT - echo inputs-for-ubuntu=$(echo "${{ inputs.build_package_inputs || '{}' }}" | yq eval '. * {"os":"ubuntu-20.04","compiler":"gnu-10","compiler_cc":"gcc-10","compiler_cxx":"g++-10","compiler_fc":"gfortran-10"}' --output-format json --indent 0 -) >> $GITHUB_OUTPUT - test: - name: test - needs: - - qa - - setup - strategy: - fail-fast: false - matrix: ${{ fromJson(needs.setup.outputs.matrix) }} - runs-on: ${{ matrix.os }} - steps: - - uses: actions/checkout@v4 - - name: Install eccodes and Dependencies - id: install-dependencies - uses: ecmwf-actions/build-package@v2 - with: - self_build: false - dependencies: | - ecmwf/ecbuild@develop - MathisRosenhauer/libaec@master - ecmwf/eccodes@develop - - - name: Setup Python - uses: actions/setup-python@v4 - with: - python-version: ${{ inputs.python_version }} - - - name: Install Python Dependencies - run: | - python -m pip install --upgrade pip - python -m pip install pytest pytest-cov - python -m pip install -r requirements.txt - python -m pip install -r ./tests/requirements_test.txt - - - name: Verify Source Distribution - shell: bash -eux {0} - run: | - python setup.py sdist - python -m pip install dist/* - - name: Run Tests with Repository Code - env: - LD_LIBRARY_PATH: ${{ steps.install-dependencies.outputs.lib_path }} - shell: bash -eux {0} - run: | - DYLD_LIBRARY_PATH=${{ env.LD_LIBRARY_PATH }} python -m pytest -m "not fdb" tests --cov=./ --cov-report=xml - python -m coverage report +jobs: + # Run CI including downstream packages on self-hosted runners + downstream-ci: + name: downstream-ci + if: ${{ !github.event.pull_request.head.repo.fork && github.event.action != 'labeled' || github.event.label.name == 'approved-for-ci' }} + uses: ecmwf/downstream-ci/.github/workflows/downstream-ci.yml@main + with: + polytope: ecmwf/polytope@${{ github.event.pull_request.head.sha || github.sha }} + codecov_upload: true + python_qa: true + secrets: inherit + + downstream-ci-hpc: + name: downstream-ci-hpc + if: ${{ !github.event.pull_request.head.repo.fork && github.event.action != 'labeled' || github.event.label.name == 'approved-for-ci' }} + uses: ecmwf/downstream-ci/.github/workflows/downstream-ci-hpc.yml@main + with: + polytope: ecmwf/polytope@${{ github.event.pull_request.head.sha || github.sha }} + secrets: inherit - - name: Upload coverage to Codecov - uses: codecov/codecov-action@v4 - with: - files: coverage.xml deploy: - needs: test if: ${{ github.event_name == 'release' }} name: Upload to Pypi runs-on: ubuntu-latest @@ -159,4 +59,4 @@ jobs: TWINE_PASSWORD: ${{ secrets.PYPI_API_TOKEN }} run: | python -m build - twine upload dist/* \ No newline at end of file + twine upload dist/* diff --git a/.github/workflows/label-public-pr.yml b/.github/workflows/label-public-pr.yml new file mode 100644 index 000000000..52f3b87c5 --- /dev/null +++ b/.github/workflows/label-public-pr.yml @@ -0,0 +1,10 @@ +# Manage labels of pull requests that originate from forks +name: label-public-pr + +on: + pull_request_target: + types: [opened, synchronize] + +jobs: + label: + uses: ecmwf-actions/reusable-workflows/.github/workflows/label-pr.yml@v2 \ No newline at end of file diff --git a/.gitignore b/.gitignore index 3bf9b6198..dc2893e55 100644 --- a/.gitignore +++ b/.gitignore @@ -29,4 +29,8 @@ polytope_feature/_version.py **.lock **/target **/*venv*/ -test_w_qubed \ No newline at end of file +test_w_qubed +performance_plots_qubed +_version.py +*.DS_Store +*.nc \ No newline at end of file diff --git a/codecov.yml b/codecov.yml index f911473e5..41335c532 100644 --- a/codecov.yml +++ b/codecov.yml @@ -6,4 +6,10 @@ ignore: - "**/test*" - "**/pb2*" - "**/fdb*" # ignore fdb backend which we can't test in CI yet - - "polytope/datacube/backends/fdb.py" # ignore fdb backend which we can't test in CI yet \ No newline at end of file + - "polytope/datacube/backends/fdb.py" # ignore fdb backend which we can't test in CI yet + +coverage: + status: + patch: + default: + enabled: false # disables patch check \ No newline at end of file diff --git a/docs/Service/Examples/OpenData/od_boundingbox.ipynb b/docs/Service/Examples/OpenData/od_boundingbox_example.ipynb similarity index 100% rename from docs/Service/Examples/OpenData/od_boundingbox.ipynb rename to docs/Service/Examples/OpenData/od_boundingbox_example.ipynb diff --git a/docs/Service/Examples/examples.md b/docs/Service/Examples/examples.md index 69d427998..2179a4103 100644 --- a/docs/Service/Examples/examples.md +++ b/docs/Service/Examples/examples.md @@ -12,11 +12,11 @@ ## Open Data * Timeseries -* Timeseries -* Timeseries -* Timeseries -* Timeseries -* Timeseries +* Polygon +* Vertical Profile +* Bounding Box +* Trajectory +* Country Cut-Out For examples of Polytope Feature Extraction on Destination Earth Digital Twin Data please visit the following Github Repo: https://github.com/destination-earth-digital-twins/polytope-examples diff --git a/polytope_feature/datacube/backends/datacube.py b/polytope_feature/datacube/backends/datacube.py index 1d9d94de6..8dc7a8ed2 100644 --- a/polytope_feature/datacube/backends/datacube.py +++ b/polytope_feature/datacube/backends/datacube.py @@ -145,7 +145,6 @@ def get_indices(self, path: DatacubePath, axis, lower, upper, method=None): e.g. returns integer discrete points between two floats """ path = self.fit_path(path) - print(path) indexes = axis.find_indexes(path, self) idx_between = axis.find_indices_between(indexes, lower, upper, self, method) @@ -170,7 +169,15 @@ def remap_path(self, path: DatacubePath): return path @staticmethod - def create(datacube, config={}, axis_options={}, compressed_axes_options=[], alternative_axes=[], datacube_axes={}, context=None): + def create( + datacube, + config={}, + axis_options={}, + compressed_axes_options=[], + alternative_axes=[], + datacube_axes={}, + context=None, + ): # TODO: get the configs as None for pre-determined value and change them to empty dictionary inside the function if type(datacube).__name__ == "DataArray": from .xarray import XArrayDatacube @@ -185,13 +192,15 @@ def create(datacube, config={}, axis_options={}, compressed_axes_options=[], alt ) return fdbdatacube if type(datacube).__name__ == "Qube": - from .qubed import QubedDatacube from ..datacube_axis import _str_to_axis + from .qubed import QubedDatacube + actual_datacube_axes = {} for key, value in datacube_axes.items(): actual_datacube_axes[key] = _str_to_axis[value] - qubed_datacube = QubedDatacube(datacube, actual_datacube_axes, - config, axis_options, compressed_axes_options, alternative_axes, context) + qubed_datacube = QubedDatacube( + datacube, actual_datacube_axes, config, axis_options, compressed_axes_options, alternative_axes, context + ) return qubed_datacube def check_branching_axes(self, request): diff --git a/polytope_feature/datacube/backends/qubed.py b/polytope_feature/datacube/backends/qubed.py index b471569cb..67aa28dae 100644 --- a/polytope_feature/datacube/backends/qubed.py +++ b/polytope_feature/datacube/backends/qubed.py @@ -2,19 +2,26 @@ import operator from copy import deepcopy from itertools import product -from ...utility.exceptions import BadGridError, BadRequestError, GribJumpNoIndexError -from ...utility.geometry import nearest_pt + +import numpy as np import pygribjump as pygj from qubed.value_types import QEnum -import numpy as np +from ...utility.exceptions import BadGridError, GribJumpNoIndexError +from ...utility.geometry import nearest_pt from .datacube import Datacube, TensorIndexTree class QubedDatacube(Datacube): - def __init__( - self, q, datacube_axes, config=None, axis_options=None, compressed_axes_options=[], alternative_axes=[], context=None + self, + q, + datacube_axes, + config=None, + axis_options=None, + compressed_axes_options=[], + alternative_axes=[], + context=None, ): if config is None: config = {} @@ -134,9 +141,26 @@ def get_indices(self, path, path_node, axis, lower, upper, method=None): logging.debug(f"For axis {axis.name} between {lower} and {upper}, found indices {idx_between}") - return idx_between + if path_node: + indexes = [indexes.index(item) for item in idx_between] + else: + indexes = None + + return (idx_between, indexes) def get(self, requests, context=None): + """ + We have a compressed tree of requests, which we need to decompress completely with its metadata indexes. + BUT the last two axes, we would like to "ignore" in the decompression and instead, we create grid index ranges from them. + WHILE we decompress, we need to keep some kind of map from decompressed request + grid index ranges to corresponding tree node. + This mapping will map potentially several decompressed request + grid index ranges tuples to the same tree nodes. + + ADDED DIFFICULTY: the grid index ranges MUST be ordered (which is not guaranteed from the tree) so we need to sort them, while also sorting the tree nodes so that they match up. + + UNTIL NOW, we had a map of compressed request + grid index ranges tuples to tree nodes. + We could just add a map of compressed request -> list of decompressed request + associated metadata + """ + if context is None: context = {} if len(requests.children) == 0: @@ -149,6 +173,9 @@ def get(self, requests, context=None): complete_list_complete_uncompressed_requests = [] complete_fdb_decoding_info = [] for j, compressed_request in enumerate(fdb_requests): + compressed_metadata = compressed_request[2] + + # TODO: get uncompressed metadata for each leaf uncompressed_request = {} # Need to determine the possible decompressed requests @@ -159,13 +186,36 @@ def get(self, requests, context=None): interm_branch_tuple_values.append(compressed_request[0][key]) request_combis = product(*interm_branch_tuple_values) + index_combis_raw = list(product(*[range(len(lst)) for lst in interm_branch_tuple_values])) + index_combis = [(0, *comb) for comb in index_combis_raw] + # Need to extract the possible requests and add them to the right nodes - for combi in request_combis: - uncompressed_request = {} - for i, key in enumerate(compressed_request[0].keys()): - uncompressed_request[key] = combi[i] - complete_uncompressed_request = (uncompressed_request, compressed_request[1], self.grid_md5_hash) - complete_list_complete_uncompressed_requests.append(complete_uncompressed_request) + + def find_metadata(metadata_idx): + metadata = {} + for k, vs in compressed_metadata.items(): + metadata_depth = len(vs.shape) + relevant_metadata_dxs = metadata_idx[:metadata_depth] + metadata[k] = vs[relevant_metadata_dxs] + return metadata + + for i, combi in enumerate(request_combis): + metadata_idxs = index_combis[i] + actual_metadata = find_metadata(metadata_idxs) + + def flatten_metadata(value): + return value[0] if isinstance(value, np.ndarray) else value + + path = flatten_metadata(actual_metadata["path"]) + scheme = flatten_metadata(actual_metadata["scheme"]) + offset = flatten_metadata(actual_metadata["offset"]) + host = flatten_metadata(actual_metadata["host"]) + port = flatten_metadata(actual_metadata["port"]) + + gj_extraction_request = pygj.PathExtractionRequest( + path, scheme, offset, host, port, compressed_request[1], self.grid_md5_hash) + + complete_list_complete_uncompressed_requests.append(gj_extraction_request) complete_fdb_decoding_info.append(fdb_requests_decoding_info[j]) if logging.root.level <= logging.DEBUG: @@ -173,7 +223,7 @@ def get(self, requests, context=None): logging.debug("The requests we give GribJump are: %s", printed_list_to_gj) logging.info("Requests given to GribJump extract for %s", context) try: - output_values = self.gj.extract(complete_list_complete_uncompressed_requests, context) + output_values = self.gj.extract_from_paths(complete_list_complete_uncompressed_requests, context) except Exception as e: if "BadValue: Grid hash mismatch" in str(e): logging.info("Error is: %s", e) @@ -196,10 +246,15 @@ def get_fdb_requests( fdb_requests=[], fdb_requests_decoding_info=[], leaf_path=None, + leaf_metadata=None, ): + # TODO: collect leaf metadata from qube here too if leaf_path is None: leaf_path = {} + if leaf_metadata is None: + leaf_metadata = {} + # First when request node is root, go to its children if requests.key == "root": logging.debug("Looking for data for the tree") @@ -217,7 +272,7 @@ def get_fdb_requests( if requests.key == "time": new_vals = [] for val in key_value_path[requests.key]: - new_vals.append(val[7:9]+val[10:12]) + new_vals.append(val[7:9] + val[10:12]) key_value_path[requests.key] = new_vals if requests.key == "date": new_vals = [] @@ -225,6 +280,8 @@ def get_fdb_requests( new_vals.append(val[:4] + val[5:7] + val[8:10]) key_value_path[requests.key] = new_vals leaf_path.update(key_value_path) + # TODO: in the leaf metadata, try to instead store mapping leaf_path -> list(individual metadata dicts for each uncompressed value of tree) + leaf_metadata.update(requests.metadata) if len(requests.children[0].children[0].children) == 0: # find the fdb_requests and associated nodes to which to add results (path, current_start_idxs, fdb_node_ranges, lat_length) = self.get_2nd_last_values(requests, leaf_path) @@ -233,13 +290,14 @@ def get_fdb_requests( sorted_request_ranges, fdb_node_ranges, ) = self.sort_fdb_request_ranges(current_start_idxs, lat_length, fdb_node_ranges) - fdb_requests.append((path, sorted_request_ranges)) + fdb_requests.append((path, sorted_request_ranges, deepcopy(leaf_metadata))) + # TODO: did we need to deepcopy the leaf metadata?? fdb_requests_decoding_info.append((original_indices, fdb_node_ranges)) # Otherwise remap the path for this key and iterate again over children else: for c in requests.children: - self.get_fdb_requests(c, fdb_requests, fdb_requests_decoding_info, leaf_path) + self.get_fdb_requests(c, fdb_requests, fdb_requests_decoding_info, leaf_path, leaf_metadata) def remove_duplicates_in_request_ranges(self, fdb_node_ranges, current_start_idxs): # seen_indices = set() @@ -337,8 +395,6 @@ def get_2nd_last_values(self, requests, leaf_path=None): current_start_idx = deepcopy(current_start_idxs[i]) fdb_range_nodes = deepcopy(fdb_node_ranges[i]) key_value_path = {lat_child.key: list(lat_child.values)} - # print("WHAT ARE THE DATACUBE AXES NOW??") - # print(self._axes.keys()) ax = self._axes[lat_child.key] (key_value_path, leaf_path, self.unwanted_path) = ax.unmap_path_key( key_value_path, leaf_path, self.unwanted_path @@ -379,13 +435,13 @@ def assign_fdb_output_to_nodes(self, output_values, fdb_requests_decoding_info): if len(request_output_values.values) == 0: # If we are here, no data was found for this path in the fdb none_array = [None] * len(n.values) - if n.data.metadata.get("result", None) is None: - n.data.metadata["result"] = [] - n.data.metadata["result"].extend(none_array) + if n.metadata.get("result", None) is None: + n.metadata["result"] = [] + n.metadata["result"].extend(none_array) else: - if n.data.metadata.get("result", None) is None: - n.data.metadata["result"] = [] - n.data.metadata["result"].extend(request_output_values.values[i]) + if n.metadata.get("result", None) is None: + n.metadata["result"] = [] + n.metadata["result"].extend(request_output_values.values[i]) def sort_fdb_request_ranges(self, current_start_idx, lat_length, fdb_node_ranges): (new_fdb_node_ranges, new_current_start_idx) = self.remove_duplicates_in_request_ranges( @@ -403,8 +459,11 @@ def sort_fdb_request_ranges(self, current_start_idx, lat_length, fdb_node_ranges sorted_list = sorted(enumerate(old_interm_start_idx[j]), key=lambda x: x[1]) original_indices_idx, interm_start_idx = zip(*sorted_list) for interm_fdb_nodes_obj in interm_fdb_nodes[j]: - interm_fdb_nodes_obj.data.values = QEnum(tuple([list(interm_fdb_nodes_obj.values)[k] - for k in original_indices_idx])) + # interm_fdb_nodes_obj.data.values = QEnum(tuple([list(interm_fdb_nodes_obj.values)[k] + # for k in original_indices_idx])) + interm_fdb_nodes_obj.values = QEnum( + tuple([list(interm_fdb_nodes_obj.values)[k] for k in original_indices_idx]) + ) if abs(interm_start_idx[-1] + 1 - interm_start_idx[0]) <= len(interm_start_idx): current_request_ranges = (interm_start_idx[0], interm_start_idx[-1] + 1) interm_request_ranges.append(current_request_ranges) diff --git a/polytope_feature/datacube/backends/qubed_fdb.py b/polytope_feature/datacube/backends/qubed_fdb.py index dcd8b31fa..c3b339b29 100644 --- a/polytope_feature/datacube/backends/qubed_fdb.py +++ b/polytope_feature/datacube/backends/qubed_fdb.py @@ -3,11 +3,12 @@ from copy import deepcopy from itertools import product +import requests +from qubed import Qube + from ...utility.exceptions import BadGridError, BadRequestError from ...utility.geometry import nearest_pt from .datacube import Datacube, TensorIndexTree -from qubed import Qube -import requests class FDBDatacube(Datacube): @@ -30,8 +31,11 @@ def __init__( # Find values in the level 3 FDB datacube self.gj = gj - self.fdb_tree = Qube.from_json(requests.get( - "https://github.com/ecmwf/qubed/raw/refs/heads/main/tests/example_qubes/climate_dt.json").json()) + self.fdb_tree = Qube.from_json( + requests.get( + "https://github.com/ecmwf/qubed/raw/refs/heads/main/tests/example_qubes/climate_dt.json" + ).json() + ) if len(alternative_axes) == 0: logging.info("Find GribJump axes for %s", context) diff --git a/polytope_feature/datacube/datacube_axis.py b/polytope_feature/datacube/datacube_axis.py index 06fbe1953..b2032a5b6 100644 --- a/polytope_feature/datacube/datacube_axis.py +++ b/polytope_feature/datacube/datacube_axis.py @@ -355,9 +355,10 @@ def serialize(self, value): float: FloatDatacubeAxis(), } -_str_to_axis = {"FloatDatacubeAxis": FloatDatacubeAxis(), - "IntDatacubeAxis": IntDatacubeAxis(), - "UnsliceableDatacubeAxis": UnsliceableDatacubeAxis(), - "PandasTimedeltaDatacubeAxis": PandasTimedeltaDatacubeAxis(), - "PandasTimestampDatacubeAxis": PandasTimestampDatacubeAxis(), - } +_str_to_axis = { + "FloatDatacubeAxis": FloatDatacubeAxis(), + "IntDatacubeAxis": IntDatacubeAxis(), + "UnsliceableDatacubeAxis": UnsliceableDatacubeAxis(), + "PandasTimedeltaDatacubeAxis": PandasTimedeltaDatacubeAxis(), + "PandasTimestampDatacubeAxis": PandasTimestampDatacubeAxis(), +} diff --git a/polytope_feature/datacube/transformations/datacube_mappers/datacube_mappers.py b/polytope_feature/datacube/transformations/datacube_mappers/datacube_mappers.py index e06c8fd38..80d971188 100644 --- a/polytope_feature/datacube/transformations/datacube_mappers/datacube_mappers.py +++ b/polytope_feature/datacube/transformations/datacube_mappers/datacube_mappers.py @@ -105,9 +105,6 @@ def unmap_path_key(self, key_value_path, leaf_path, unwanted_path, axis): unwanted_path[axis.name] = unwanted_val if axis.name == self._mapped_axes()[1]: first_val = unwanted_path[self._mapped_axes()[0]] - # unmapped_idx = [self.unmap(first_val, (val,)) for val in value] - # print("AND HERE??") - # print(values) unmapped_idx = self.unmap(first_val, values) leaf_path.pop(self._mapped_axes()[0], None) key_value_path.pop(axis.name) diff --git a/polytope_feature/datacube/transformations/datacube_mappers/mapper_types/__init__.py b/polytope_feature/datacube/transformations/datacube_mappers/mapper_types/__init__.py index ba9a7b339..e69de29bb 100644 --- a/polytope_feature/datacube/transformations/datacube_mappers/mapper_types/__init__.py +++ b/polytope_feature/datacube/transformations/datacube_mappers/mapper_types/__init__.py @@ -1,5 +0,0 @@ -from .healpix import * -from .local_regular import * -from .octahedral import * -from .reduced_ll import * -from .regular import * diff --git a/polytope_feature/datacube/transformations/datacube_mappers/mapper_types/healpix_nested.py b/polytope_feature/datacube/transformations/datacube_mappers/mapper_types/healpix_nested.py index fe695469a..54335151e 100644 --- a/polytope_feature/datacube/transformations/datacube_mappers/mapper_types/healpix_nested.py +++ b/polytope_feature/datacube/transformations/datacube_mappers/mapper_types/healpix_nested.py @@ -1,6 +1,8 @@ import bisect import math +import numpy as np + from ..datacube_mappers import DatacubeMapper @@ -12,6 +14,7 @@ def __init__(self, base_axis, mapped_axes, resolution, md5_hash=None, local_area self._resolution = resolution self._axis_reversed = {mapped_axes[0]: True, mapped_axes[1]: False} self._first_axis_vals = self.first_axis_vals() + self._first_axis_vals_np_rounded = -np.round(np.array(self._first_axis_vals), decimals=8) self.compressed_grid_axes = [self._mapped_axes[1]] self.Nside = self._resolution self.k = int(math.log2(self.Nside)) @@ -76,12 +79,10 @@ def HEALPix_nj(self, i): def HEALPix_longitudes(self, i): Nj = self.HEALPix_nj(i) step = 360.0 / Nj - start = ( - step / 2.0 if i < self._resolution or 3 * self._resolution - 1 < i or (i + self._resolution) % 2 else 0.0 + start = np.where( + (i < self._resolution) | (3 * self._resolution - 1 < i) | ((i + self._resolution) % 2 == 1), step / 2.0, 0.0 ) - - longitudes = [start + n * step for n in range(Nj)] - + longitudes = start + np.arange(Nj) * step return longitudes def map_second_axis(self, first_val, lower, upper): @@ -90,138 +91,124 @@ def map_second_axis(self, first_val, lower, upper): return return_vals def axes_idx_to_healpix_idx(self, first_idx, second_idx): - idx = 0 - for i in range(self._resolution - 1): - if i != first_idx: - idx += 4 * (i + 1) - else: - idx += second_idx - return idx - for i in range(self._resolution - 1, 3 * self._resolution): - if i != first_idx: - idx += 4 * self._resolution - else: - idx += second_idx - return idx - for i in range(3 * self._resolution, 4 * self._resolution - 1): - if i != first_idx: - idx += 4 * (4 * self._resolution - 1 - i + 1) - else: - idx += second_idx - return idx - - def find_second_idx(self, first_val, second_val): - tol = 1e-10 - second_axis_vals = self.second_axis_vals(first_val) - second_idx = bisect.bisect_left(second_axis_vals, second_val - tol) - return second_idx - - def unmap_first_val_to_start_line_idx(self, first_val): - tol = 1e-8 - first_val = [i for i in self._first_axis_vals if first_val - tol <= i <= first_val + tol][0] - first_idx = self._first_axis_vals.index(first_val) - idx = 0 - for i in range(self._resolution - 1): - if i != first_idx: - idx += 4 * (i + 1) - else: - return idx - for i in range(self._resolution - 1, 3 * self._resolution): - if i != first_idx: - idx += 4 * self._resolution - else: - return idx - for i in range(3 * self._resolution, 4 * self._resolution - 1): - if i != first_idx: - idx += 4 * (4 * self._resolution - 1 - i + 1) - else: - return idx - - def unmap(self, first_val, second_vals, unmapped_idx=None): - tol = 1e-8 - first_value = [i for i in self._first_axis_vals if first_val[0] - tol <= i <= first_val[0] + tol][0] - first_idx = self._first_axis_vals.index(first_value) - healpix_idxs = [] - for second_val in second_vals: - second_val = [i for i in self.second_axis_vals(first_val) if second_val - tol <= i <= second_val + tol][0] - second_idx = self.second_axis_vals(first_val).index(second_val) - healpix_index = self.axes_idx_to_healpix_idx(first_idx, second_idx) - # TODO: here do conversion of ring to nested healpix representation before returning - healpix_index = self.ring_to_nested(healpix_index) - healpix_idxs.append(healpix_index) - return healpix_idxs + res = self._resolution + sum1 = 2 * (res - 1) * res + sum2 = 2 * (((res - 1) * res) - ((4 * res - 1 - first_idx) * (4 * res - first_idx))) + + if first_idx < res - 1: + return (2 * first_idx * (first_idx + 1)) + second_idx + elif first_idx < 3 * res: + return sum1 + (first_idx - (res - 1)) * (4 * res) + second_idx + else: + return sum1 + (2 * res + 1) * (4 * res) + sum2 + second_idx + + def unmap(self, first_val, second_vals): + # Convert to NumPy array for fast computation + idx = np.searchsorted(self._first_axis_vals_np_rounded, -np.round(first_val[0], decimals=8)) + if idx >= len(self._first_axis_vals_np_rounded): + return None + second_axis_vals = np.round(np.array(self.second_axis_vals_from_idx(idx)), decimals=8) + second_vals = np.round(np.array(second_vals), decimals=8) + second_idxs = np.searchsorted(second_axis_vals, second_vals) + valid_mask = second_idxs < len(second_axis_vals) + if not np.all(valid_mask): + return None + healpix_idxs = [self.axes_idx_to_healpix_idx(idx, sec_idx) for sec_idx in second_idxs] + return self.ring_to_nested(np.asarray(healpix_idxs)).tolist() def div_03(self, a, b): - t = 1 if a >= (b << 1) else 0 + """Vectorized version of div_03""" + t = np.where(a >= (b << 1), 1, 0) a -= t * (b << 1) - return (t << 1) + (1 if a >= b else 0) + return (t << 1) + np.where(a >= b, 1, 0) def pll(self, f): - pll_values = [1, 3, 5, 7, 0, 2, 4, 6, 1, 3, 5, 7] + """Vectorized lookup for PLL values""" + pll_values = np.array([1, 3, 5, 7, 0, 2, 4, 6, 1, 3, 5, 7]) return pll_values[f] def to_nest(self, f, ring, Nring, phi, shift): - r = int(((2 + (f >> 2)) << self.k) - ring - 1) - p = int(2 * phi - self.pll(f) * Nring - shift - 1) - if p >= 2 * self.Nside: - p -= 8 * self.Nside - i = int((r + p)) >> 1 - j = int((r - p)) >> 1 + """Vectorized to_nest conversion""" + r = ((2 + (f >> 2)) << self.k) - ring - 1 + p = 2 * phi - self.pll(f) * Nring - shift - 1 + p = np.where(p >= 2 * self.Nside, p - 8 * self.Nside, p) + i = (r + p) >> 1 + j = (r - p) >> 1 return self.fij_to_nest(f, i, j, self.k) def fij_to_nest(self, f, i, j, k): - return (f << (2 * k)) + self.nest_encode_bits(i) + (self.nest_encode_bits(j) << 1) + """Vectorized nest encoding""" + return ( + # (f.astype(np.uint64) << np.uint64(2 * k)) + (f.astype(object) << (2 * k)) + + self.nest_encode_bits(i) + + (self.nest_encode_bits(j).astype(np.uint64) << np.uint64(1)) + ) def nest_encode_bits(self, i): - __masks = [ - 0x00000000FFFFFFFF, - 0x0000FFFF0000FFFF, - 0x00FF00FF00FF00FF, - 0x0F0F0F0F0F0F0F0F, - 0x3333333333333333, - 0x5555555555555555, - ] - i = int(i) - b = i & __masks[0] - b = (b ^ (b << 16)) & __masks[1] - b = (b ^ (b << 8)) & __masks[2] - b = (b ^ (b << 4)) & __masks[3] - b = (b ^ (b << 2)) & __masks[4] - b = (b ^ (b << 1)) & __masks[5] - return b - - def ring_to_nested(self, idx): - if idx < self.Ncap: - # North polar cap - Nring = (1 + self.int_sqrt(2 * idx + 1)) >> 1 - phi = 1 + idx - 2 * Nring * (Nring - 1) - f = self.div_03(phi - 1, Nring) - return self.to_nest(f, Nring, Nring, phi, 0) - - if self.Npix - self.Ncap <= idx: - # South polar cap - Nring = (1 + self.int_sqrt(2 * self.Npix - 2 * idx - 1)) >> 1 - phi = 1 + idx + 2 * Nring * (Nring - 1) + 4 * Nring - self.Npix - ring = 4 * self.Nside - Nring # (from South pole) - f = self.div_03(phi - 1, Nring) + 8 - return self.to_nest(f, ring, Nring, phi, 0) - else: - # Equatorial belt - ip = idx - self.Ncap - tmp = ip >> (self.k + 2) - - phi = ip - tmp * 4 * self.Nside + 1 - ring = tmp + self.Nside + """Vectorized bit manipulation for HEALPix indexing""" + __masks = np.array( + [ + 0x00000000FFFFFFFF, + 0x0000FFFF0000FFFF, + 0x00FF00FF00FF00FF, + 0x0F0F0F0F0F0F0F0F, + 0x3333333333333333, + 0x5555555555555555, + ], + dtype=np.uint64, + ) - ifm = 1 + ((phi - 1 - ((1 + tmp) >> 1)) >> self.k) - ifp = 1 + ((phi - 1 - ((1 - tmp + 2 * self.Nside) >> 1)) >> self.k) - f = (ifp | 4) if ifp == ifm else (ifp if ifp < ifm else (ifm + 8)) + b = i.astype(np.uint64) & __masks[0] + b = (b ^ (b << np.uint64(16))) & __masks[1] + b = (b ^ (b << np.uint64(8))) & __masks[2] + b = (b ^ (b << np.uint64(4))) & __masks[3] + b = (b ^ (b << np.uint64(2))) & __masks[4] + b = (b ^ (b << np.uint64(1))) & __masks[5] + return b - return self.to_nest(f, ring, self.Nside, phi, ring & 1) + def int_sqrt(self, x): + """Efficient integer square root for arrays""" + return np.sqrt(x + 0.5).astype(int) - def int_sqrt(self, i): - return int(math.sqrt(i + 0.5)) + def ring_to_nested(self, idx): + """Vectorized ring_to_nested conversion""" + # idx = np.asarray(idx) # Ensure input is an array + + north_mask = idx < self.Ncap + south_mask = self.Npix - self.Ncap <= idx + + # North polar cap + Nring_north = (1 + self.int_sqrt(2 * idx + 1)) >> 1 + phi_north = 1 + idx - 2 * Nring_north * (Nring_north - 1) + f_north = self.div_03(phi_north - 1, Nring_north) + nested_north = self.to_nest(f_north, Nring_north, Nring_north, phi_north, 0) + + # South polar cap + Nring_south = (1 + self.int_sqrt(2 * self.Npix - 2 * idx - 1)) >> 1 + phi_south = 1 + idx + 2 * Nring_south * (Nring_south - 1) + 4 * Nring_south - self.Npix + ring_south = 4 * self.Nside - Nring_south + f_south = self.div_03(phi_south - 1, Nring_south) + 8 + nested_south = self.to_nest(f_south, ring_south, Nring_south, phi_south, 0) + + # Equatorial belt + ip = idx - self.Ncap + tmp = ip >> (self.k + 2) + + phi_equatorial = ip - tmp * 4 * self.Nside + 1 + ring_equatorial = tmp + self.Nside + + ifm = 1 + ((phi_equatorial - 1 - ((1 + tmp) >> 1)) >> self.k) + ifp = 1 + ((phi_equatorial - 1 - ((1 - tmp + 2 * self.Nside) >> 1)) >> self.k) + f_equatorial = np.where(ifp == ifm, ifp | 4, np.where(ifp < ifm, ifp, ifm + 8)) + + nested_equatorial = self.to_nest(f_equatorial, ring_equatorial, self.Nside, phi_equatorial, ring_equatorial & 1) + nested_result = np.empty_like(idx) # Preallocate array for performance + nested_result[north_mask] = nested_north[north_mask] + nested_result[south_mask] = nested_south[south_mask] + nested_result[~(north_mask | south_mask)] = nested_equatorial[~(north_mask | south_mask)] + return nested_result # md5 grid hash in form {resolution : hash} diff --git a/polytope_feature/datacube/transformations/datacube_mappers/mapper_types/reduced_ll.py b/polytope_feature/datacube/transformations/datacube_mappers/mapper_types/reduced_ll.py index 5319a4ade..18619dc06 100644 --- a/polytope_feature/datacube/transformations/datacube_mappers/mapper_types/reduced_ll.py +++ b/polytope_feature/datacube/transformations/datacube_mappers/mapper_types/reduced_ll.py @@ -5128,4 +5128,4 @@ def unmap(self, first_val, second_vals): # md5 grid hash in form {resolution : hash} -_md5_hash = {3601: "386742a2dd1201b67f2d19ed421353ea"} +_md5_hash = {3601: "225e56fb2fdee272ca226dc265d08a0a"} diff --git a/polytope_feature/datacube/transformations/datacube_reverse/datacube_reverse.py b/polytope_feature/datacube/transformations/datacube_reverse/datacube_reverse.py index 4d3232910..7a4da9bc4 100644 --- a/polytope_feature/datacube/transformations/datacube_reverse/datacube_reverse.py +++ b/polytope_feature/datacube/transformations/datacube_reverse/datacube_reverse.py @@ -1,6 +1,7 @@ +import numpy as np + from ....utility.list_tools import bisect_left_cmp, bisect_right_cmp from ..datacube_transformations import DatacubeAxisTransformation -import numpy as np class DatacubeAxisReverse(DatacubeAxisTransformation): diff --git a/polytope_feature/engine/engine.py b/polytope_feature/engine/engine.py index 653a6a2b8..ad8d84b09 100644 --- a/polytope_feature/engine/engine.py +++ b/polytope_feature/engine/engine.py @@ -1,12 +1,10 @@ +import math from typing import List from ..datacube.backends.datacube import Datacube -from ..datacube.tensor_index_tree import TensorIndexTree -from ..shapes import ConvexPolytope, Product - from ..datacube.datacube_axis import UnsliceableDatacubeAxis +from ..shapes import ConvexPolytope, Product from ..utility.list_tools import unique -import math class Engine: @@ -78,8 +76,5 @@ def extract(self, datacube: Datacube, polytopes: List[ConvexPolytope]): self.find_compressed_axes(datacube, polytopes) self.remove_compressed_axis_in_union(polytopes) self.pre_process_polytopes(datacube, polytopes) - # assert isinstance(datacube, QubedDatacube) tree = self.build_tree(polytopes, datacube) - print("WHAT DOES THE TREE LOOK LIKE??") - print(tree) return tree diff --git a/polytope_feature/engine/hullslicer.py b/polytope_feature/engine/hullslicer.py index e326ee536..cb51cbe7c 100644 --- a/polytope_feature/engine/hullslicer.py +++ b/polytope_feature/engine/hullslicer.py @@ -1,11 +1,7 @@ from copy import copy -from typing import List - -from ..datacube.backends.datacube import Datacube from ..datacube.tensor_index_tree import TensorIndexTree -from ..shapes import ConvexPolytope -from ..utility.combinatorics import group, tensor_product, find_polytope_combinations +from ..utility.combinatorics import find_polytope_combinations, group, tensor_product from ..utility.exceptions import UnsliceableShapeError from .engine import Engine from .slicing_tools import slice diff --git a/polytope_feature/engine/qubed_slicer.py b/polytope_feature/engine/qubed_slicer.py index b71f9f2f2..fac8e9611 100644 --- a/polytope_feature/engine/qubed_slicer.py +++ b/polytope_feature/engine/qubed_slicer.py @@ -1,18 +1,18 @@ - +import numpy as np +import pandas as pd from qubed import Qube from qubed.value_types import QEnum -from qubed.set_operations import union -from .slicing_tools import slice -from ..datacube.backends.qubed import QubedDatacube -from .engine import Engine -import pandas as pd + from ..datacube.datacube_axis import UnsliceableDatacubeAxis from ..datacube.transformations.datacube_mappers.datacube_mappers import DatacubeMapper -from ..shapes import ConvexPolytope, Product -from ..utility.combinatorics import group, tensor_product, find_polytopes_on_axis, find_polytope_combinations -from typing import List - -from ..datacube.backends.datacube import Datacube +from ..utility.combinatorics import ( + find_polytope_combinations, + find_polytopes_on_axis, + group, + tensor_product, +) +from .engine import Engine +from .slicing_tools import slice class QubedSlicer(Engine): @@ -27,14 +27,16 @@ def find_datacube_vals(): def find_values_between(self, polytope, ax, node, datacube, lower, upper, path=None): if isinstance(ax, UnsliceableDatacubeAxis): - return [v for v in node.values if lower <= v <= upper] + filtered = [(i, v) for i, v in enumerate(node.values) if lower <= v <= upper] + indices, values = zip(*filtered) if filtered else ([], []) + return values, indices tol = ax.tol lower = ax.from_float(lower - tol) upper = ax.from_float(upper + tol) method = polytope.method - values = datacube.get_indices(path, node, ax, lower, upper, method) - return values + values, indexes = datacube.get_indices(path, node, ax, lower, upper, method) + return values, indexes def get_sliced_polys(self, found_vals, ax, child_name, poly, slice_axis_idx): sliced_polys = [] @@ -48,8 +50,7 @@ def get_sliced_polys(self, found_vals, ax, child_name, poly, slice_axis_idx): def find_children_polytopes(self, polytopes, poly, sliced_polys): child_polytopes = [p for p in polytopes if p != poly] - child_polytopes.extend( - [sliced_poly_ for sliced_poly_ in sliced_polys if sliced_poly_ is not None]) + child_polytopes.extend([sliced_poly_ for sliced_poly_ in sliced_polys if sliced_poly_ is not None]) return child_polytopes def find_new_vals(self, found_vals, ax): @@ -63,41 +64,75 @@ def find_new_vals(self, found_vals, ax): new_found_vals.append(found_val) return new_found_vals - def build_branch(self, real_uncompressed_axis, found_vals, sliced_polys, polytopes, poly, child, datacube, datacube_transformations, ax): + def build_branch( + self, + real_uncompressed_axis, + found_vals, + sliced_polys, + polytopes, + poly, + child, + datacube, + datacube_transformations, + ax, + idxs=None, + metadata_idx_stack=None, + ): final_children_and_vals = [] if real_uncompressed_axis: for i, found_val in enumerate(found_vals): - sliced_polys_ = [sliced_polys[i]] + if i < len(sliced_polys): + sliced_polys_ = [sliced_polys[i]] + else: + sliced_polys_ = sliced_polys child_polytopes = self.find_children_polytopes(polytopes, poly, sliced_polys_) - children = self._slice(child, child_polytopes, datacube, datacube_transformations) + if idxs: + current_metadata_idx_stack = metadata_idx_stack + [[idxs[i]]] + children = self._slice(child, child_polytopes, datacube, datacube_transformations, + current_metadata_idx_stack) # If this node used to have children but now has none due to filtering, skip it. if child.children and not children: continue new_found_vals = self.find_new_vals([found_val], ax) - final_children_and_vals.append((children, new_found_vals)) + + if idxs: + request_child_val = (children, new_found_vals, current_metadata_idx_stack) + else: + request_child_val = (children, new_found_vals) + final_children_and_vals.append(request_child_val) else: # if it's compressed, then can add all found values in a single node child_polytopes = self.find_children_polytopes(polytopes, poly, sliced_polys) # create children - children = self._slice(child, child_polytopes, datacube, datacube_transformations) + if idxs: + current_metadata_idx_stack = metadata_idx_stack + [idxs] + children = self._slice(child, child_polytopes, datacube, datacube_transformations, + current_metadata_idx_stack) # If this node used to have children but now has none due to filtering, skip it. if child.children and not children: return None new_found_vals = self.find_new_vals(found_vals, ax) - final_children_and_vals.append((children, new_found_vals)) + if idxs: + request_child_val = (children, new_found_vals, current_metadata_idx_stack) + else: + request_child_val = (children, new_found_vals) + final_children_and_vals.append(request_child_val) + if len(final_children_and_vals) == 0: return None return final_children_and_vals - def _slice(self, q: Qube, polytopes, datacube, datacube_transformations) -> list[Qube]: + def _slice(self, q: Qube, polytopes, datacube, datacube_transformations, metadata_idx_stack=None) -> list[Qube]: result = [] + if metadata_idx_stack is None: + metadata_idx_stack = [[0]] + if len(q.children) == 0: # add "fake" axes and their nodes in order -> what about merged axes?? mapper_transformation = None - # for transformation in list(datacube_transformations.values()): for transformation in datacube_transformations: if isinstance(transformation, DatacubeMapper): mapper_transformation = transformation @@ -115,7 +150,7 @@ def _slice(self, q: Qube, polytopes, datacube, datacube_transformations) -> list ax = datacube._axes[grid_axes[0]] lower, upper, slice_axis_idx = poly.extents(grid_axes[0]) - found_vals = self.find_values_between(poly, ax, None, datacube, lower, upper) + found_vals, _ = self.find_values_between(poly, ax, None, datacube, lower, upper) if len(found_vals) == 0: continue @@ -123,7 +158,6 @@ def _slice(self, q: Qube, polytopes, datacube, datacube_transformations) -> list sliced_polys = self.get_sliced_polys(found_vals, ax, grid_axes[0], poly, slice_axis_idx) # decide if axis should be compressed or not according to polytope # NOTE: actually the first grid axis will never be compressed - # axis_compressed = (grid_axes[0] in self.compressed_axes) # if it's not compressed, need to separate into different nodes to append to the tree for i, found_val in enumerate(found_vals): @@ -136,7 +170,13 @@ def _slice(self, q: Qube, polytopes, datacube, datacube_transformations) -> list flattened_path = {grid_axes[0]: (found_val,)} # get second axis children through slicing children = self._slice_second_grid_axis( - grid_axes[1], child_polytopes, datacube, datacube_transformations, second_axis_vals, flattened_path) + grid_axes[1], + child_polytopes, + datacube, + datacube_transformations, + second_axis_vals, + flattened_path, + ) # If this node used to have children but now has none due to filtering, skip it. if not children: continue @@ -148,10 +188,9 @@ def _slice(self, q: Qube, polytopes, datacube, datacube_transformations) -> list # TODO: when we have an axis that we would like to merge with another, we should skip the node creation here # and instead keep/cache the value to merge with the node from before?? - qube_node = Qube.make(key=grid_axes[0], - values=QEnum([found_val]), - metadata={}, - children=children) + qube_node = Qube.make_node( + key=grid_axes[0], values=QEnum([found_val]), metadata={}, children=children + ) result.append(qube_node) for i, child in enumerate(q.children): @@ -160,40 +199,66 @@ def _slice(self, q: Qube, polytopes, datacube, datacube_transformations) -> list # TODO: here add the axes to datacube backend with transformations for child.key # TODO: update the datacube axis_options before we dynamically change the axes + + # TODO: this is slow... will need to make it faster and only do this when we need... datacube.add_axes_dynamically(child) # here now first change the values in the polytopes on the axis to reflect the axis type - for poly in polytopes_on_axis: ax = datacube._axes[child.key] # find extents of polytope on child.key lower, upper, slice_axis_idx = poly.extents(child.key) # find values on child that are within extents - found_vals = self.find_values_between(poly, ax, child, datacube, lower, upper) + found_vals, idxs = self.find_values_between(poly, ax, child, datacube, lower, upper) + + # TODO: find the indexes of the found_vals wrt child.values, to extract the right metadata that we want to keep inside self.build_branch if len(found_vals) == 0: continue sliced_polys = self.get_sliced_polys(found_vals, ax, child.key, poly, slice_axis_idx) # decide if axis should be compressed or not according to polytope - axis_compressed = (child.key in self.compressed_axes) + axis_compressed = child.key in self.compressed_axes real_uncompressed_axis = not axis_compressed and len(found_vals) > 1 final_children_and_vals = self.build_branch( - real_uncompressed_axis, found_vals, sliced_polys, polytopes, poly, child, datacube, datacube_transformations, ax) + real_uncompressed_axis, + found_vals, + sliced_polys, + polytopes, + poly, + child, + datacube, + datacube_transformations, + ax, + idxs, + metadata_idx_stack + ) if final_children_and_vals is None: continue - result.extend([Qube.make( - key=child.key, - values=QEnum(new_found_vals), - metadata=child.metadata, - children=children - ) for (children, new_found_vals) in final_children_and_vals]) + + def find_metadata(metadata_idx): + metadata = {} + for k, vs in child.metadata.items(): + metadata_depth = len(vs.shape) + relevant_metadata_idxs = metadata_idx[:metadata_depth] + ix = np.ix_(*relevant_metadata_idxs) + metadata[k] = vs[ix] + return metadata + + for children, new_found_vals, current_metadata_idxs in final_children_and_vals: + metadata = find_metadata(current_metadata_idxs) + qube_node = Qube.make_node( + key=child.key, values=QEnum(new_found_vals), metadata=metadata, children=children + ) + result.append(qube_node) return result - def _slice_second_grid_axis(self, axis_name, polytopes, datacube, datacube_transformations, second_axis_vals, path) -> list[Qube]: + def _slice_second_grid_axis( + self, axis_name, polytopes, datacube, datacube_transformations, second_axis_vals, path + ) -> list[Qube]: result = [] polytopes_on_axis = find_polytopes_on_axis(axis_name, polytopes) @@ -201,7 +266,7 @@ def _slice_second_grid_axis(self, axis_name, polytopes, datacube, datacube_trans ax = datacube._axes[axis_name] lower, upper, slice_axis_idx = poly.extents(axis_name) - found_vals = self.find_values_between(poly, ax, None, datacube, lower, upper, path) + found_vals, _ = self.find_values_between(poly, ax, None, datacube, lower, upper, path) if len(found_vals) == 0: continue @@ -221,18 +286,14 @@ def _slice_second_grid_axis(self, axis_name, polytopes, datacube, datacube_trans # NOTE this was the last axis so we do not have children... - result.extend([Qube.make( - key=axis_name, - values=QEnum(new_found_vals), - metadata={}, - children={} - )]) + result.extend([Qube.make_node(key=axis_name, values=QEnum( + new_found_vals), metadata={"result": []}, children={})]) return result def slice_tree(self, datacube, final_polys): q = datacube.q datacube_transformations = datacube.datacube_transformations - return Qube.root_node(self._slice(q, final_polys, datacube, datacube_transformations)) + return Qube.make_root(self._slice(q, final_polys, datacube, datacube_transformations)) def build_tree(self, polytopes_to_slice, datacube): groups, input_axes = group(polytopes_to_slice) @@ -250,5 +311,5 @@ def build_tree(self, polytopes_to_slice, datacube): final_tree = sub_trees[0] for sub_tree in sub_trees[1:]: - union(final_tree, sub_tree) + final_tree | sub_tree return final_tree diff --git a/polytope_feature/options.py b/polytope_feature/options.py index 584dc9b04..020c7007e 100644 --- a/polytope_feature/options.py +++ b/polytope_feature/options.py @@ -3,6 +3,7 @@ from conflator import ConfigModel from pydantic import ConfigDict + from .datacube.datacube_axis import DatacubeAxis diff --git a/polytope_feature/polytope.py b/polytope_feature/polytope.py index 0f806c8c7..ad2cc561a 100644 --- a/polytope_feature/polytope.py +++ b/polytope_feature/polytope.py @@ -46,8 +46,13 @@ def __init__(self, datacube, engine=None, options=None, context=None): if options is None: options = {} - axis_options, compressed_axes_options, config, alternative_axes, datacube_axes = PolytopeOptions.get_polytope_options( - options) + ( + axis_options, + compressed_axes_options, + config, + alternative_axes, + datacube_axes, + ) = PolytopeOptions.get_polytope_options(options) self.context = context self.datacube = Datacube.create( diff --git a/polytope_feature/version.py b/polytope_feature/version.py index d0ab53db9..a82b376d2 100644 --- a/polytope_feature/version.py +++ b/polytope_feature/version.py @@ -1 +1 @@ -__version__ = "1.0.39" +__version__ = "1.1.1" diff --git a/readme.md b/readme.md index 05eed3c21..2aaaf1fdf 100644 --- a/readme.md +++ b/readme.md @@ -13,6 +13,9 @@ Documentation Status + + Project Maturity +

Concept • @@ -30,8 +33,11 @@ Polytope is designed to extend different datacube backends: Polytope supports datacubes which have branching, non-uniform indexing, and even cyclic axes. If the datacube backend supports byte-addressability and efficient random access (either in-memory or direct from storage), **Polytope** can be used to dramatically decrease overall I/O load. -> [!WARNING] -> This project is BETA and will be experimental for the foreseeable future. Interfaces and functionality are likely to change, and the project itself may be scrapped. DO NOT use this software in any project/software that is operational. +> \[!IMPORTANT\] +> This software is **Incubating** and subject to ECMWF's guidelines on [Software Maturity](https://github.com/ecmwf/codex/raw/refs/heads/main/Project%20Maturity). + + ## Concept diff --git a/tests/conftest.py b/tests/conftest.py new file mode 100644 index 000000000..1ea51bb91 --- /dev/null +++ b/tests/conftest.py @@ -0,0 +1,101 @@ +import os +import pathlib +import shutil + +import pyfdb +import pytest +import requests +import yaml + + +class HTTPError(Exception): + def __init__(self, status_code, message): + self.status_code = status_code + self.message = message + super().__init__(f"HTTPError {status_code}: {message}") + + +@pytest.fixture(scope="session") +def shared_temp_data_dir(tmp_path_factory): + """ + Provides a unique temp directory for the entire test session. + """ + temp_dir = tmp_path_factory.mktemp("shared_fdb_data") + return temp_dir + + +@pytest.fixture(scope="session") +def downloaded_data_test_files(shared_temp_data_dir): + """ + Downloads all required GRIB test files once per session. + """ + files_to_download = [ + ("https://sites.ecmwf.int/repository/polytope/test-data/foo.grib", "foo.grib"), + ("https://sites.ecmwf.int/repository/polytope/test-data/local.grib", "local.grib"), + ("https://sites.ecmwf.int/repository/polytope/test-data/aifs_data_from_fdb.grib", "aifs_data_from_fdb.grib"), + ("https://sites.ecmwf.int/repository/polytope/test-data/wind_gust_and_t2m.grib", "wind_gust_and_t2m.grib"), + ("https://sites.ecmwf.int/repository/polytope/test-data/t2m_jan_3_v2.grib", "t2m_jan_3_v2.grib"), + ("https://sites.ecmwf.int/repository/polytope/test-data/wave_spectra.grib", "wave_spectra.grib"), + ("https://sites.ecmwf.int/repository/polytope/test-data/era5-levels-members.grib", "era5-levels-members.grib"), + ] + + downloaded_paths = [] + + for url, filename in files_to_download: + path = shared_temp_data_dir / filename + if not path.exists(): + response = requests.get(url) + if response.status_code != 200: + raise HTTPError(response.status_code, f"Failed to download {url}") + with open(path, "wb") as f: + f.write(response.content) + downloaded_paths.append(path) + + return downloaded_paths + + +@pytest.fixture(scope="session") +def fdb_path(request) -> pathlib.Path: + """ + Provides path to test data at '/tests/fdb_data'. + """ + path = request.config.rootpath / "tests" / "fdb_data" + assert path.exists(), f"Expected path {path} to exist." + return path + + +@pytest.fixture(scope="session", autouse=True) +def fdb_store_operational_setup(fdb_path, tmp_path_factory, downloaded_data_test_files) -> pathlib.Path: + """ + Creates an operational FDB store for tests, loading downloaded test files. + """ + tmp_dir = tmp_path_factory.mktemp("shared_path") + db_store_path = tmp_dir / "db_store" + db_store_path.mkdir(exist_ok=True) + + schema_path = tmp_dir / "schema" + config = dict( + type="local", + engine="toc", + schema=str(schema_path), + spaces=[ + dict( + handler="Default", + roots=[ + {"path": str(db_store_path)}, + ], + ) + ], + ) + + config_path = tmp_dir / "config.yaml" + config_path.write_text(yaml.dump(config)) + shutil.copy(fdb_path / "schema", schema_path) + + os.environ["FDB5_CONFIG_FILE"] = str(config_path) + + fdb = pyfdb.FDB() + for f in downloaded_data_test_files: + fdb.archive(f.read_bytes()) + + return tmp_dir diff --git a/tests/fdb_data/schema b/tests/fdb_data/schema new file mode 100644 index 000000000..9d40771cd --- /dev/null +++ b/tests/fdb_data/schema @@ -0,0 +1,49 @@ +# Default types + +param: Param; +step: Step; +date: Date; +levelist: Double; +grid: Grid; +expver: Expver; +time: Time; +number: Integer; + +######################################################## +# The are the rules matching most of the fields +# oper/dcda +[ class, expver, stream=oper/dcda/scda, date, time, domain? + [ type, levtype + [ step, levelist?, param ]] +] + +# dcwv/scwv/wave +[ class, expver, stream=dcwv/scwv/wave, date, time, domain + [ type, levtype + [ step, param, frequency?, direction? ]]] + +# enfo +[ class, expver, stream=enfo/efov/eefo, date, time, domain + [ type, levtype + [ step, quantile?, number?, levelist?, param ]] +] + +# waef/weov +[ class, expver, stream=waef/weov/weef, date, time, domain + [ type, levtype + [ step, number?, param, frequency?, direction? ]] +] + +# enda +[ class, expver, stream=enda, date, time, domain + + [ type=ef/em/es/ses, levtype + [ step, number?, levelist?, param, channel? ]] + + [ type=ssd + [ step, number, param, ident, instrument, channel ]] + + + [ type, levtype + [ step, number?, levelist?, param ]] +] diff --git a/tests/requirements_test.txt b/tests/requirements_test.txt index e48f1da1a..f3408d8ca 100644 --- a/tests/requirements_test.txt +++ b/tests/requirements_test.txt @@ -4,4 +4,7 @@ cffi eccodes h5netcdf h5py -earthkit-data \ No newline at end of file +earthkit-data +pyfdb +git+https://github.com/ecmwf/gribjump.git@develop +requests \ No newline at end of file diff --git a/tests/test_bad_request_error.py b/tests/test_bad_request_error.py index b1154cf9c..1bff4f308 100644 --- a/tests/test_bad_request_error.py +++ b/tests/test_bad_request_error.py @@ -4,9 +4,6 @@ from polytope_feature.polytope import Polytope from polytope_feature.utility.exceptions import BadRequestError -# import geopandas as gpd -# import matplotlib.pyplot as plt - class TestSlicingFDBDatacube: def setup_method(self, method): @@ -50,7 +47,6 @@ def setup_method(self, method): }, } - # Testing different shapes @pytest.mark.fdb def test_fdb_datacube(self): import pygribjump as gj diff --git a/tests/test_cyclic_nearest.py b/tests/test_cyclic_nearest.py index 86e37c69c..d02f3e4d3 100644 --- a/tests/test_cyclic_nearest.py +++ b/tests/test_cyclic_nearest.py @@ -10,7 +10,7 @@ class TestRegularGrid: def setup_method(self, method): - nexus_url = "https://get.ecmwf.int/test-data/polytope/test-data/era5-levels-members.grib" + nexus_url = "https://sites.ecmwf.int/repository/polytope/test-data/era5-levels-members.grib" download_test_data(nexus_url, "era5-levels-members.grib") self.options = { "axis_config": [ @@ -75,7 +75,7 @@ def test_regular_grid(self): request = Request( Select("step", [0]), Select("levtype", ["sfc"]), - Select("date", [pd.Timestamp("20231102T000000")]), + Select("date", [pd.Timestamp("20240103T0000")]), Select("domain", ["g"]), Select("expver", ["0001"]), Select("param", ["167"]), @@ -99,7 +99,7 @@ def test_regular_grid(self): request = Request( Select("step", [0]), Select("levtype", ["sfc"]), - Select("date", [pd.Timestamp("20231102T000000")]), + Select("date", [pd.Timestamp("20240103T0000")]), Select("domain", ["g"]), Select("expver", ["0001"]), Select("param", ["167"]), diff --git a/tests/test_cyclic_snapping.py b/tests/test_cyclic_snapping.py index 599dc9573..984d3fb96 100644 --- a/tests/test_cyclic_snapping.py +++ b/tests/test_cyclic_snapping.py @@ -23,8 +23,6 @@ def setup_method(self, method): self.slicer = HullSlicer() self.API = Polytope(datacube=array, engine=self.slicer, options=options) - # Testing different shapes - def test_cyclic_float_axis_across_seam(self): request = Request(Select("long", [-0.2], method="surrounding")) result = self.API.retrieve(request) diff --git a/tests/test_datacube_axes_init.py b/tests/test_datacube_axes_init.py index d92a08cf3..e7f40efb2 100644 --- a/tests/test_datacube_axes_init.py +++ b/tests/test_datacube_axes_init.py @@ -10,7 +10,7 @@ class TestInitDatacubeAxes: def setup_method(self, method): - nexus_url = "https://get.ecmwf.int/test-data/polytope/test-data/foo.grib" + nexus_url = "https://sites.ecmwf.int/repository/polytope/test-data/foo.grib" download_test_data(nexus_url, "foo.grib") ds = data.from_source("file", "./tests/data/foo.grib") diff --git a/tests/test_date_time_unmerged.py b/tests/test_date_time_unmerged.py index f675a354e..5e808b504 100644 --- a/tests/test_date_time_unmerged.py +++ b/tests/test_date_time_unmerged.py @@ -38,7 +38,6 @@ def setup_method(self, method): ], } - # Testing different shapes @pytest.mark.fdb def test_fdb_datacube(self): import pygribjump as gj @@ -46,9 +45,7 @@ def test_fdb_datacube(self): request = Request( Select("step", [0]), Select("levtype", ["sfc"]), - # Select("date", [pd.Timestamp("20240118")]), Select("time", [pd.Timedelta("00:00:00")]), - # Span("time", [pd.Timedelta("00:00:00")]), Span("date", pd.Timestamp("20240118"), pd.Timestamp("20240119")), Select("domain", ["g"]), Select("expver", ["0001"]), diff --git a/tests/test_ecmwf_oper_data_fdb.py b/tests/test_ecmwf_oper_data_fdb.py index 2efdfe4a8..9ffc19ee0 100644 --- a/tests/test_ecmwf_oper_data_fdb.py +++ b/tests/test_ecmwf_oper_data_fdb.py @@ -41,7 +41,6 @@ def setup_method(self, method): "pre_path": {"class": "od", "expver": "0001", "levtype": "sfc", "type": "fc", "stream": "oper"}, } - # Testing different shapes @pytest.mark.fdb def test_fdb_datacube(self): import pygribjump as gj @@ -49,7 +48,7 @@ def test_fdb_datacube(self): request = Request( Select("step", [0]), Select("levtype", ["sfc"]), - Select("date", [pd.Timestamp("20231102T000000")]), + Select("date", [pd.Timestamp("20240103T0000")]), Select("domain", ["g"]), Select("expver", ["0001"]), Select("param", ["167"]), @@ -71,7 +70,6 @@ def test_fdb_datacube(self): # assert len(result.leaves) == 3 # assert len(result.leaves[0].result) == 3 - @pytest.mark.fdb def test_fdb_datacube_point(self): import pygribjump as gj diff --git a/tests/test_engine_slicer.py b/tests/test_engine_slicer.py index 3392ccd3a..0c8d1c0d7 100644 --- a/tests/test_engine_slicer.py +++ b/tests/test_engine_slicer.py @@ -46,18 +46,12 @@ def test_triangle(self): result = self.slicer.extract(datacube, triangle) result.pprint() assert len(result.leaves) == 10 - # assert len(result.leaves) == 4 - # total_leaves = 0 - # for leaf in result.leaves: - # total_leaves += len(leaf.values) - # assert total_leaves == 4 + 3 + 2 + 1 def test_reusable(self): datacube = MockDatacube({"x": 100, "y": 100}) polytopes = Polygon(["x", "y"], [[3, 3], [3, 6], [6, 3]]).polytope() result = self.slicer.extract(datacube, polytopes) result.pprint() - # assert len(result.leaves) == 4 assert len(result.leaves) == 10 polytopes = Box(["x", "y"], lower_corner=[3, 3], upper_corner=[6, 6]).polytope() result = self.slicer.extract(datacube, polytopes) diff --git a/tests/test_fdb_datacube.py b/tests/test_fdb_datacube.py index 301ceec30..eb52af1d6 100644 --- a/tests/test_fdb_datacube.py +++ b/tests/test_fdb_datacube.py @@ -45,7 +45,6 @@ def setup_method(self, method): "pre_path": {"class": "od", "expver": "0001", "levtype": "sfc", "stream": "oper"}, } - # Testing different shapes @pytest.mark.fdb def test_fdb_datacube(self): import pygribjump as gj diff --git a/tests/test_healpix_mapper.py b/tests/test_healpix_mapper.py index a0ad6586a..a6d3fc738 100644 --- a/tests/test_healpix_mapper.py +++ b/tests/test_healpix_mapper.py @@ -12,7 +12,7 @@ class TestHealpixGrid: def setup_method(self, method): - nexus_url = "https://get.ecmwf.int/test-data/polytope/test-data/healpix.grib" + nexus_url = "https://sites.ecmwf.int/repository/polytope/test-data/healpix.grib" download_test_data(nexus_url, "healpix.grib") ds = data.from_source("file", "./tests/data/healpix.grib") diff --git a/tests/test_healpix_nested_grid.py b/tests/test_healpix_nested_grid.py index 8a929a2fd..8caa68c24 100644 --- a/tests/test_healpix_nested_grid.py +++ b/tests/test_healpix_nested_grid.py @@ -13,7 +13,7 @@ class TestHealpixNestedGrid: def setup_method(self, method): - nexus_url = "https://get.ecmwf.int/test-data/polytope/test-data/healpix_nested.grib" + nexus_url = "https://sites.ecmwf.int/repository/polytope/test-data/healpix_nested.grib" download_test_data(nexus_url, "healpix_nested.grib") ds = data.from_source("file", "./tests/data/healpix_nested.grib")[3] diff --git a/tests/test_incomplete_tree_fdb.py b/tests/test_incomplete_tree_fdb.py index 2fbfcaf28..2bc689dcd 100644 --- a/tests/test_incomplete_tree_fdb.py +++ b/tests/test_incomplete_tree_fdb.py @@ -9,7 +9,7 @@ class TestRegularGrid: def setup_method(self, method): - nexus_url = "https://get.ecmwf.int/test-data/polytope/test-data/era5-levels-members.grib" + nexus_url = "https://sites.ecmwf.int/repository/polytope/test-data/era5-levels-members.grib" download_test_data(nexus_url, "era5-levels-members.grib") self.options = { "axis_config": [ diff --git a/tests/test_local_grid_cyclic.py b/tests/test_local_grid_cyclic.py index 3f4bfd36c..63aa38216 100644 --- a/tests/test_local_grid_cyclic.py +++ b/tests/test_local_grid_cyclic.py @@ -50,7 +50,6 @@ def setup_method(self, method): ], } - # Testing different shapes @pytest.mark.fdb def test_fdb_datacube(self): import pygribjump as gj diff --git a/tests/test_local_regular_grid.py b/tests/test_local_regular_grid.py index 3f8f2d402..518f12fdf 100644 --- a/tests/test_local_regular_grid.py +++ b/tests/test_local_regular_grid.py @@ -49,7 +49,6 @@ def setup_method(self, method): ], } - # Testing different shapes @pytest.mark.fdb def test_fdb_datacube(self): import pygribjump as gj diff --git a/tests/test_local_swiss_grid.py b/tests/test_local_swiss_grid.py index 34019e54f..50f88c691 100644 --- a/tests/test_local_swiss_grid.py +++ b/tests/test_local_swiss_grid.py @@ -38,7 +38,6 @@ def setup_method(self, method): "compressed_axes_config": ["longitude", "latitude", "levtype", "levelist", "step", "date", "param"], } - # Testing different shapes @pytest.mark.fdb @pytest.mark.skip("Non-accessible data") def test_fdb_datacube(self): diff --git a/tests/test_merge_octahedral_one_axis.py b/tests/test_merge_octahedral_one_axis.py index 4ddf90582..05f8269af 100644 --- a/tests/test_merge_octahedral_one_axis.py +++ b/tests/test_merge_octahedral_one_axis.py @@ -9,7 +9,7 @@ class TestSlicingMultipleTransformationsOneAxis: def setup_method(self, method): - nexus_url = "https://get.ecmwf.int/test-data/polytope/test-data/foo.grib" + nexus_url = "https://sites.ecmwf.int/repository/polytope/test-data/foo.grib" download_test_data(nexus_url, "foo.grib") ds = data.from_source("file", "./tests/data/foo.grib") diff --git a/tests/test_multiple_param_fdb.py b/tests/test_multiple_param_fdb.py index 11285af84..774584c93 100644 --- a/tests/test_multiple_param_fdb.py +++ b/tests/test_multiple_param_fdb.py @@ -41,7 +41,6 @@ def setup_method(self, method): ], } - # Testing different shapes @pytest.mark.fdb def test_fdb_datacube(self): import pygribjump as gj diff --git a/tests/test_octahedral_grid.py b/tests/test_octahedral_grid.py index f44ccf337..6c3d7fa16 100644 --- a/tests/test_octahedral_grid.py +++ b/tests/test_octahedral_grid.py @@ -9,7 +9,7 @@ class TestOctahedralGrid: def setup_method(self, method): - nexus_url = "https://get.ecmwf.int/test-data/polytope/test-data/foo.grib" + nexus_url = "https://sites.ecmwf.int/repository/polytope/test-data/foo.grib" download_test_data(nexus_url, "foo.grib") ds = data.from_source("file", "./tests/data/foo.grib") diff --git a/tests/test_override_md5_hash_options.py b/tests/test_override_md5_hash_options.py index 0233290af..58696fe0a 100644 --- a/tests/test_override_md5_hash_options.py +++ b/tests/test_override_md5_hash_options.py @@ -48,7 +48,6 @@ def setup_method(self, method): ], } - # Testing different shapes @pytest.mark.fdb def test_fdb_datacube(self): import pygribjump as gj diff --git a/tests/test_point_nearest.py b/tests/test_point_nearest.py index b9fdc4bed..78cf8007c 100644 --- a/tests/test_point_nearest.py +++ b/tests/test_point_nearest.py @@ -42,7 +42,6 @@ def setup_method(self, method): ], } - # Testing different shapes @pytest.mark.fdb def test_fdb_datacube(self): import pygribjump as gj @@ -128,15 +127,15 @@ def test_fdb_datacube_true_point_3(self): import pygribjump as gj request = Request( - Select("step", [21]), + Select("step", [0]), Select("levtype", ["sfc"]), - Select("date", [pd.Timestamp("20231102T000000")]), + Select("date", [pd.Timestamp("20230625T120000")]), Select("domain", ["g"]), Select("expver", ["0001"]), Select("param", ["167"]), Select("class", ["od"]), Select("stream", ["oper"]), - Select("type", ["fc"]), + Select("type", ["an"]), Point(["latitude", "longitude"], [[0.035149384216, -0.01]], method="nearest"), ) self.fdbdatacube = gj.GribJump() @@ -157,15 +156,15 @@ def test_fdb_datacube_true_point_5(self): import pygribjump as gj request = Request( - Select("step", [21]), + Select("step", [0]), Select("levtype", ["sfc"]), - Select("date", [pd.Timestamp("20231102T000000")]), + Select("date", [pd.Timestamp("20230625T120000")]), Select("domain", ["g"]), Select("expver", ["0001"]), Select("param", ["167"]), Select("class", ["od"]), Select("stream", ["oper"]), - Select("type", ["fc"]), + Select("type", ["an"]), Point(["latitude", "longitude"], [[0.035149384216, 360 - 0.01]], method="nearest"), ) self.fdbdatacube = gj.GribJump() @@ -186,15 +185,15 @@ def test_fdb_datacube_true_point_4(self): import pygribjump as gj request = Request( - Select("step", [21]), + Select("step", [0]), Select("levtype", ["sfc"]), - Select("date", [pd.Timestamp("20231102T000000")]), + Select("date", [pd.Timestamp("20230625T120000")]), Select("domain", ["g"]), Select("expver", ["0001"]), Select("param", ["167"]), Select("class", ["od"]), Select("stream", ["oper"]), - Select("type", ["fc"]), + Select("type", ["an"]), Point(["latitude", "longitude"], [[0.035149384216, 359.97]], method="nearest"), ) self.fdbdatacube = gj.GribJump() @@ -205,7 +204,6 @@ def test_fdb_datacube_true_point_4(self): options=self.options, ) result = self.API.retrieve(request) - # result.pprint_2() assert len(result.leaves) == 1 assert result.leaves[0].values == (359.929906542056,) assert result.leaves[0].axis.name == "longitude" diff --git a/tests/test_point_shape.py b/tests/test_point_shape.py index 247e4b878..35c803984 100644 --- a/tests/test_point_shape.py +++ b/tests/test_point_shape.py @@ -30,7 +30,6 @@ def test_point(self): assert result.leaves[0].axis.name == "level" def test_multiple_points(self): - # request = Request(Point(["step", "level"], [[3, 10], [3, 12]]), Select("date", ["2000-01-01"])) request = Request( Union(["step", "level"], Point(["step", "level"], [[3, 10]]), Point(["step", "level"], [[3, 12]])), Select("date", ["2000-01-01"]), diff --git a/tests/test_point_union.py b/tests/test_point_union.py index 32a01272b..b9e8552d5 100644 --- a/tests/test_point_union.py +++ b/tests/test_point_union.py @@ -42,7 +42,6 @@ def setup_method(self, method): ], } - # Testing different shapes @pytest.mark.fdb def test_fdb_datacube(self): import pygribjump as gj diff --git a/tests/test_qubed_extraction_engine.py b/tests/test_qubed_extraction_engine.py index bf4bb17a6..a1a6c7440 100644 --- a/tests/test_qubed_extraction_engine.py +++ b/tests/test_qubed_extraction_engine.py @@ -1,25 +1,35 @@ -from polytope_feature.shapes import Box, Select, Span -from polytope_feature.polytope import Polytope, Request -from polytope_feature.engine.qubed_slicer import QubedSlicer -from polytope_feature.datacube.backends.qubed import QubedDatacube -from polytope_feature.datacube.backends.fdb import FDBDatacube +import time + +import pandas as pd +import pygribjump as gj import pytest -from qubed import Qube import requests -from polytope_feature.datacube.datacube_axis import PandasTimedeltaDatacubeAxis, PandasTimestampDatacubeAxis, UnsliceableDatacubeAxis, FloatDatacubeAxis -# from polytope_feature.datacube.backends.test_qubed_slicing import actual_slice -from polytope_feature.datacube.transformations.datacube_type_change.datacube_type_change import TypeChangeStrToTimestamp, TypeChangeStrToTimedelta -import pandas as pd -from polytope_feature.datacube.transformations.datacube_mappers.mapper_types.healpix_nested import NestedHealpixGridMapper +from qubed import Qube -from polytope_feature.shapes import ConvexPolytope -import time -import pygribjump as gj +from polytope_feature.datacube.backends.fdb import FDBDatacube +from polytope_feature.datacube.backends.qubed import QubedDatacube +from polytope_feature.datacube.datacube_axis import ( + FloatDatacubeAxis, + PandasTimedeltaDatacubeAxis, + PandasTimestampDatacubeAxis, + UnsliceableDatacubeAxis, +) +from polytope_feature.datacube.transformations.datacube_mappers.mapper_types.healpix_nested import ( + NestedHealpixGridMapper, +) + +# from polytope_feature.datacube.backends.test_qubed_slicing import actual_slice +from polytope_feature.datacube.transformations.datacube_type_change.datacube_type_change import ( + TypeChangeStrToTimedelta, + TypeChangeStrToTimestamp, +) from polytope_feature.engine.hullslicer import HullSlicer +from polytope_feature.engine.qubed_slicer import QubedSlicer +from polytope_feature.polytope import Polytope, Request +from polytope_feature.shapes import Box, ConvexPolytope, Select, Span def find_relevant_subcube_from_request(request, qube_url): - # NOTE: final url we want is like: # "https://qubed.lumi.apps.dte.destination-earth.eu/api/v1/select/climate-dt/?class=d1&dataset=climate-dt" @@ -36,8 +46,9 @@ def find_relevant_subcube_from_request(request, qube_url): return qube_url -fdb_tree = Qube.from_json(requests.get( - "https://github.com/ecmwf/qubed/raw/refs/heads/main/tests/example_qubes/climate_dt.json").json()) +fdb_tree = Qube.from_json( + requests.get("https://github.com/ecmwf/qubed/raw/refs/heads/main/tests/example_qubes/climate-dt.json").json() +) combi_polytopes = [ @@ -45,39 +56,40 @@ def find_relevant_subcube_from_request(request, qube_url): ConvexPolytope(["time"], [[pd.Timedelta(hours=0, minutes=0)], [pd.Timedelta(hours=12, minutes=0)]]), ConvexPolytope(["resolution"], [["high"]]), ConvexPolytope(["type"], [["fc"]]), - ConvexPolytope(["model"], [['ifs-nemo']]), + ConvexPolytope(["model"], [["ifs-nemo"]]), ConvexPolytope(["stream"], [["clte"]]), ConvexPolytope(["realization"], ["1"]), - ConvexPolytope(["expver"], [['0001']]), - ConvexPolytope(["experiment"], [['ssp3-7.0']]), + ConvexPolytope(["expver"], [["0001"]]), + ConvexPolytope(["experiment"], [["ssp3-7.0"]]), ConvexPolytope(["generation"], [["1"]]), ConvexPolytope(["levtype"], [["sfc"]]), ConvexPolytope(["activity"], [["scenariomip"]]), ConvexPolytope(["dataset"], [["climate-dt"]]), ConvexPolytope(["class"], [["d1"]]), ConvexPolytope(["date"], [[pd.Timestamp("20220811")], [pd.Timestamp("20220912")]]), - ConvexPolytope(["latitude", "longitude"], [[0, 0], [0.5, 0.5], [0, 0.5]]) + ConvexPolytope(["latitude", "longitude"], [[0, 0], [0.5, 0.5], [0, 0.5]]), ] # TODO: add lat and lon axes -datacube_axes = {"param": UnsliceableDatacubeAxis(), - "time": PandasTimedeltaDatacubeAxis(), - "resolution": UnsliceableDatacubeAxis(), - "type": UnsliceableDatacubeAxis(), - "model": UnsliceableDatacubeAxis(), - "stream": UnsliceableDatacubeAxis(), - "realization": UnsliceableDatacubeAxis(), - "expver": UnsliceableDatacubeAxis(), - "experiment": UnsliceableDatacubeAxis(), - "generation": UnsliceableDatacubeAxis(), - "levtype": UnsliceableDatacubeAxis(), - "activity": UnsliceableDatacubeAxis(), - "dataset": UnsliceableDatacubeAxis(), - "class": UnsliceableDatacubeAxis(), - "date": PandasTimestampDatacubeAxis(), - # "latitude": FloatDatacubeAxis(), - # "longitude": FloatDatacubeAxis() - } +datacube_axes = { + "param": UnsliceableDatacubeAxis(), + "time": PandasTimedeltaDatacubeAxis(), + "resolution": UnsliceableDatacubeAxis(), + "type": UnsliceableDatacubeAxis(), + "model": UnsliceableDatacubeAxis(), + "stream": UnsliceableDatacubeAxis(), + "realization": UnsliceableDatacubeAxis(), + "expver": UnsliceableDatacubeAxis(), + "experiment": UnsliceableDatacubeAxis(), + "generation": UnsliceableDatacubeAxis(), + "levtype": UnsliceableDatacubeAxis(), + "activity": UnsliceableDatacubeAxis(), + "dataset": UnsliceableDatacubeAxis(), + "class": UnsliceableDatacubeAxis(), + "date": PandasTimestampDatacubeAxis(), + # "latitude": FloatDatacubeAxis(), + # "longitude": FloatDatacubeAxis() +} time_val = pd.Timedelta(hours=0, minutes=0) date_val = pd.Timestamp("20300101T000000") @@ -87,7 +99,7 @@ def find_relevant_subcube_from_request(request, qube_url): datacube_transformations = { "time": TypeChangeStrToTimedelta("time", time_val), "date": TypeChangeStrToTimestamp("date", date_val), - "values": NestedHealpixGridMapper("values", ["latitude", "longitude"], 1024) + "values": NestedHealpixGridMapper("values", ["latitude", "longitude"], 1024), } @@ -142,22 +154,23 @@ def find_relevant_subcube_from_request(request, qube_url): # # "latitude": FloatDatacubeAxis(), # # "longitude": FloatDatacubeAxis() # } - "datacube_axes": {"param": "UnsliceableDatacubeAxis", - "time": "PandasTimedeltaDatacubeAxis", - "resolution": "UnsliceableDatacubeAxis", - "type": "UnsliceableDatacubeAxis", - "model": "UnsliceableDatacubeAxis", - "stream": "UnsliceableDatacubeAxis", - "realization": "UnsliceableDatacubeAxis", - "expver": "UnsliceableDatacubeAxis", - "experiment": "UnsliceableDatacubeAxis", - "generation": "UnsliceableDatacubeAxis", - "levtype": "UnsliceableDatacubeAxis", - "activity": "UnsliceableDatacubeAxis", - "dataset": "UnsliceableDatacubeAxis", - "class": "UnsliceableDatacubeAxis", - "date": "PandasTimestampDatacubeAxis", - } + "datacube_axes": { + "param": "UnsliceableDatacubeAxis", + "time": "PandasTimedeltaDatacubeAxis", + "resolution": "UnsliceableDatacubeAxis", + "type": "UnsliceableDatacubeAxis", + "model": "UnsliceableDatacubeAxis", + "stream": "UnsliceableDatacubeAxis", + "realization": "UnsliceableDatacubeAxis", + "expver": "UnsliceableDatacubeAxis", + "experiment": "UnsliceableDatacubeAxis", + "generation": "UnsliceableDatacubeAxis", + "levtype": "UnsliceableDatacubeAxis", + "activity": "UnsliceableDatacubeAxis", + "dataset": "UnsliceableDatacubeAxis", + "class": "UnsliceableDatacubeAxis", + "date": "PandasTimestampDatacubeAxis", + }, } # request = Request( @@ -173,24 +186,25 @@ def find_relevant_subcube_from_request(request, qube_url): # Box(["latitude", "longitude"], [0, 0], [0.2, 0.2]), # ) -request = Request(ConvexPolytope(["param"], [["164"]]), - ConvexPolytope(["time"], [[pd.Timedelta(hours=1, minutes=0)], [pd.Timedelta(hours=3, minutes=0)]]), - ConvexPolytope(["resolution"], [["high"]]), - ConvexPolytope(["type"], [["fc"]]), - ConvexPolytope(["model"], [['ifs-nemo']]), - ConvexPolytope(["stream"], [["clte"]]), - ConvexPolytope(["realization"], ["1"]), - ConvexPolytope(["expver"], [['0001']]), - ConvexPolytope(["experiment"], [['ssp3-7.0']]), - ConvexPolytope(["generation"], [["1"]]), - ConvexPolytope(["levtype"], [["sfc"]]), - ConvexPolytope(["activity"], [["scenariomip"]]), - ConvexPolytope(["dataset"], [["climate-dt"]]), - ConvexPolytope(["class"], [["d1"]]), - ConvexPolytope(["date"], [[pd.Timestamp("20220811")]]), - ConvexPolytope(["latitude", "longitude"], [[0, 0], [0.5, 0.5], [0, 0.5]]) - # ConvexPolytope(["latitude", "longitude"], [[0, 0], [-0.5, -0.5], [0, -0.5]]) - ) +request = Request( + ConvexPolytope(["param"], [[164]]), + ConvexPolytope(["time"], [[pd.Timedelta(hours=1, minutes=0)], [pd.Timedelta(hours=3, minutes=0)]]), + ConvexPolytope(["resolution"], [["high"]]), + ConvexPolytope(["type"], [["fc"]]), + ConvexPolytope(["model"], [["ifs-nemo"]]), + ConvexPolytope(["stream"], [["clte"]]), + ConvexPolytope(["realization"], [[1]]), + ConvexPolytope(["expver"], [["0001"]]), + ConvexPolytope(["experiment"], [["ssp3-7.0"]]), + ConvexPolytope(["generation"], [[1]]), + ConvexPolytope(["levtype"], [["sfc"]]), + ConvexPolytope(["activity"], [["scenariomip"]]), + ConvexPolytope(["dataset"], [["climate-dt"]]), + ConvexPolytope(["class"], [["d1"]]), + ConvexPolytope(["date"], [[pd.Timestamp("20220811")]]), + ConvexPolytope(["latitude", "longitude"], [[0, 0], [0.5, 0.5], [0, 0.5]]) + # ConvexPolytope(["latitude", "longitude"], [[0, 0], [-0.5, -0.5], [0, -0.5]]) +) qubeddatacube = QubedDatacube(fdb_tree, datacube_axes, datacube_transformations) slicer = QubedSlicer() @@ -201,7 +215,8 @@ def find_relevant_subcube_from_request(request, qube_url): options=options, ) time1 = time.time() -result = self_API.retrieve(request) +# result = self_API.retrieve(request) +result = self_API.slice(request.polytopes()) time2 = time.time() print(result) @@ -249,32 +264,35 @@ def find_relevant_subcube_from_request(request, qube_url): fdbdatacube = gj.GribJump() slicer = HullSlicer() -self_API = Polytope( - datacube=fdbdatacube, - engine=slicer, - options=options, -) +# self_API = Polytope( +# datacube=fdbdatacube, +# engine=slicer, +# options=options, +# ) -request = Request(ConvexPolytope(["param"], [["164"]]), - ConvexPolytope(["time"], [[pd.Timedelta(hours=1, minutes=0)], [pd.Timedelta(hours=3, minutes=0)]]), - ConvexPolytope(["resolution"], [["high"]]), - ConvexPolytope(["type"], [["fc"]]), - ConvexPolytope(["model"], [['ifs-nemo']]), - ConvexPolytope(["stream"], [["clte"]]), - ConvexPolytope(["realization"], ["1"]), - ConvexPolytope(["expver"], [['0001']]), - ConvexPolytope(["experiment"], [['ssp3-7.0']]), - ConvexPolytope(["generation"], [["1"]]), - ConvexPolytope(["levtype"], [["sfc"]]), - ConvexPolytope(["activity"], [["scenariomip"]]), - ConvexPolytope(["dataset"], [["climate-dt"]]), - ConvexPolytope(["class"], [["d1"]]), - ConvexPolytope(["date"], [[pd.Timestamp("20220811")]]), - ConvexPolytope(["latitude", "longitude"], [[0, 0], [0.5, 0.5], [0, 0.5]])) +request = Request( + ConvexPolytope(["param"], [["164"]]), + ConvexPolytope(["time"], [[pd.Timedelta(hours=1, minutes=0)], [pd.Timedelta(hours=3, minutes=0)]]), + ConvexPolytope(["resolution"], [["high"]]), + ConvexPolytope(["type"], [["fc"]]), + ConvexPolytope(["model"], [["ifs-nemo"]]), + ConvexPolytope(["stream"], [["clte"]]), + ConvexPolytope(["realization"], ["1"]), + ConvexPolytope(["expver"], [["0001"]]), + ConvexPolytope(["experiment"], [["ssp3-7.0"]]), + ConvexPolytope(["generation"], [["1"]]), + ConvexPolytope(["levtype"], [["sfc"]]), + ConvexPolytope(["activity"], [["scenariomip"]]), + ConvexPolytope(["dataset"], [["climate-dt"]]), + ConvexPolytope(["class"], [["d1"]]), + ConvexPolytope(["date"], [[pd.Timestamp("20220811")]]), + ConvexPolytope(["latitude", "longitude"], [[0, 0], [0.5, 0.5], [0, 0.5]]), +) time3 = time.time() -result = self_API.retrieve(request) +# result = self_API.retrieve(request) +# result = self_API.slice(request.polytopes()) time4 = time.time() print("TIME EXTRACTING USING GJ NORMAL") diff --git a/tests/test_qubed_extraction_engine_w_metadata.py b/tests/test_qubed_extraction_engine_w_metadata.py new file mode 100644 index 000000000..7156303df --- /dev/null +++ b/tests/test_qubed_extraction_engine_w_metadata.py @@ -0,0 +1,280 @@ +import time + +import pandas as pd +import pygribjump as gj +import pytest +import requests +from qubed import Qube + +from polytope_feature.datacube.backends.fdb import FDBDatacube +from polytope_feature.datacube.backends.qubed import QubedDatacube +from polytope_feature.datacube.datacube_axis import ( + FloatDatacubeAxis, + PandasTimedeltaDatacubeAxis, + PandasTimestampDatacubeAxis, + UnsliceableDatacubeAxis, +) +from polytope_feature.datacube.transformations.datacube_mappers.mapper_types.healpix_nested import ( + NestedHealpixGridMapper, +) + +# from polytope_feature.datacube.backends.test_qubed_slicing import actual_slice +from polytope_feature.datacube.transformations.datacube_type_change.datacube_type_change import ( + TypeChangeStrToTimedelta, + TypeChangeStrToTimestamp, +) +from polytope_feature.engine.hullslicer import HullSlicer +from polytope_feature.engine.qubed_slicer import QubedSlicer +from polytope_feature.polytope import Polytope, Request +from polytope_feature.shapes import Box, ConvexPolytope, Select, Span + + +def find_relevant_subcube_from_request(request, qube_url): + # NOTE: final url we want is like: + # "https://qubed.lumi.apps.dte.destination-earth.eu/api/v1/select/climate-dt/?class=d1&dataset=climate-dt" + + for shape in request.shapes: + if isinstance(shape, Select): + qube_url += shape.axis + "=" + for i, val in enumerate(shape.values): + qube_url += str(val) + if i < len(shape.values) - 1: + qube_url += "," + qube_url += "&" + # TODO: remove last unnecessary & + qube_url = qube_url[:-1] + return qube_url + + +fdb_tree = Qube.from_json( + requests.get( + "https://github.com/ecmwf/qubed/raw/refs/heads/main/tests/example_qubes/extremes-dt_with_metadata.json" + ).json() +) + + +# combi_polytopes = [ +# ConvexPolytope(["param"], [["164"]]), +# ConvexPolytope(["time"], [[pd.Timedelta(hours=0, minutes=0)], [pd.Timedelta(hours=12, minutes=0)]]), +# ConvexPolytope(["resolution"], [["high"]]), +# ConvexPolytope(["type"], [["fc"]]), +# ConvexPolytope(["model"], [['ifs-nemo']]), +# ConvexPolytope(["stream"], [["clte"]]), +# ConvexPolytope(["realization"], ["1"]), +# ConvexPolytope(["expver"], [['0001']]), +# ConvexPolytope(["experiment"], [['ssp3-7.0']]), +# ConvexPolytope(["generation"], [["1"]]), +# ConvexPolytope(["levtype"], [["sfc"]]), +# ConvexPolytope(["activity"], [["scenariomip"]]), +# ConvexPolytope(["dataset"], [["climate-dt"]]), +# ConvexPolytope(["class"], [["d1"]]), +# ConvexPolytope(["date"], [[pd.Timestamp("20220811")], [pd.Timestamp("20220912")]]), +# ConvexPolytope(["latitude", "longitude"], [[0, 0], [0.5, 0.5], [0, 0.5]]) +# ] + +# TODO: add lat and lon axes +datacube_axes = { + "param": UnsliceableDatacubeAxis(), + "time": PandasTimedeltaDatacubeAxis(), + "resolution": UnsliceableDatacubeAxis(), + "type": UnsliceableDatacubeAxis(), + "model": UnsliceableDatacubeAxis(), + "stream": UnsliceableDatacubeAxis(), + "realization": UnsliceableDatacubeAxis(), + "expver": UnsliceableDatacubeAxis(), + "experiment": UnsliceableDatacubeAxis(), + "generation": UnsliceableDatacubeAxis(), + "levtype": UnsliceableDatacubeAxis(), + "activity": UnsliceableDatacubeAxis(), + "dataset": UnsliceableDatacubeAxis(), + "class": UnsliceableDatacubeAxis(), + "date": PandasTimestampDatacubeAxis(), + # "latitude": FloatDatacubeAxis(), + # "longitude": FloatDatacubeAxis() +} + +time_val = pd.Timedelta(hours=0, minutes=0) +date_val = pd.Timestamp("20300101T000000") + + +# TODO: add grid axis transformation +datacube_transformations = { + "time": TypeChangeStrToTimedelta("time", time_val), + "date": TypeChangeStrToTimestamp("date", date_val), + "values": NestedHealpixGridMapper("values", ["latitude", "longitude"], 1024), +} + + +options = { + "axis_config": [ + {"axis_name": "step", "transformations": [{"name": "type_change", "type": "int"}]}, + {"axis_name": "number", "transformations": [{"name": "type_change", "type": "int"}]}, + # { + # "axis_name": "date", + # "transformations": [{"name": "merge", "other_axis": "time", "linkers": ["T", "00"]}], + # }, + {"axis_name": "date", "transformations": [{"name": "type_change", "type": "date"}]}, + {"axis_name": "time", "transformations": [{"name": "type_change", "type": "time"}]}, + { + "axis_name": "values", + "transformations": [ + {"name": "mapper", "type": "octahedral", "resolution": 2560, "axes": ["latitude", "longitude"]} + ], + }, + {"axis_name": "latitude", "transformations": [{"name": "reverse", "is_reverse": True}]}, + {"axis_name": "longitude", "transformations": [{"name": "cyclic", "range": [0, 360]}]}, + ], + "compressed_axes_config": [ + "longitude", + "latitude", + "levtype", + "step", + "date", + "domain", + "expver", + "param", + "class", + "stream", + "type", + ], + "pre_path": {"class": "od", "expver": "0001", "levtype": "sfc", "stream": "oper"}, + "datacube_axes": { + "param": "UnsliceableDatacubeAxis", + "time": "PandasTimedeltaDatacubeAxis", + "type": "UnsliceableDatacubeAxis", + "stream": "UnsliceableDatacubeAxis", + "expver": "UnsliceableDatacubeAxis", + "levtype": "UnsliceableDatacubeAxis", + "dataset": "UnsliceableDatacubeAxis", + "class": "UnsliceableDatacubeAxis", + "date": "PandasTimestampDatacubeAxis", + "step": "IntDatacubeAxis", + }, +} + +# request = Request( +# Select("step", [0]), +# Select("levtype", ["sfc"]), +# Select("date", [pd.Timestamp("20230625T120000")]), +# Select("domain", ["g"]), +# Select("expver", ["0001"]), +# Select("param", ["167"]), +# Select("class", ["od"]), +# Select("stream", ["oper"]), +# Select("type", ["an"]), +# Box(["latitude", "longitude"], [0, 0], [0.2, 0.2]), +# ) + +request = Request( + ConvexPolytope(["param"], [["167"]]), + ConvexPolytope(["time"], [[pd.Timedelta(hours=0, minutes=0)], [pd.Timedelta(hours=3, minutes=0)]]), + ConvexPolytope(["type"], [["fc"]]), + ConvexPolytope(["stream"], [["oper"]]), + ConvexPolytope(["expver"], [["0001"]]), + ConvexPolytope(["levtype"], [["sfc"]]), + ConvexPolytope(["dataset"], [["extremes-dt"]]), + ConvexPolytope(["class"], [["d1"]]), + ConvexPolytope(["step"], [[1]]), + ConvexPolytope(["date"], [[pd.Timestamp("20240407")]]), + ConvexPolytope(["latitude", "longitude"], [[0, 0], [0.5, 0.5], [0, 0.5]]) + # ConvexPolytope(["latitude", "longitude"], [[0, 0], [-0.5, -0.5], [0, -0.5]]) +) +# print(fdb_tree) +qubeddatacube = QubedDatacube(fdb_tree, datacube_axes, datacube_transformations) +slicer = QubedSlicer() +self_API = Polytope( + # datacube=qubeddatacube, + datacube=fdb_tree, + engine=slicer, + options=options, +) +time1 = time.time() +result = self_API.retrieve(request) +# result = self_API.slice(request.polytopes()) +time2 = time.time() + +print(result) + +print("TIME EXTRACTING USING QUBED") +print(time2 - time1) + +# USING NORMAL GJ + + +options = { + "axis_config": [ + {"axis_name": "step", "transformations": [{"name": "type_change", "type": "int"}]}, + {"axis_name": "number", "transformations": [{"name": "type_change", "type": "int"}]}, + # { + # "axis_name": "date", + # "transformations": [{"name": "merge", "other_axis": "time", "linkers": ["T", "00"]}], + # }, + {"axis_name": "date", "transformations": [{"name": "type_change", "type": "date"}]}, + {"axis_name": "time", "transformations": [{"name": "type_change", "type": "time"}]}, + { + "axis_name": "values", + "transformations": [ + {"name": "mapper", "type": "healpix_nested", "resolution": 1024, "axes": ["latitude", "longitude"]} + ], + }, + {"axis_name": "latitude", "transformations": [{"name": "reverse", "is_reverse": True}]}, + {"axis_name": "longitude", "transformations": [{"name": "cyclic", "range": [0, 360]}]}, + ], + "compressed_axes_config": [ + "longitude", + # "latitude", + # "levtype", + # "step", + # "date", + # "domain", + # "expver", + # "param", + # "class", + # "stream", + # "type", + ], + "pre_path": {"class": "d1", "model": "ifs-nemo", "resolution": "high"}, +} + +fdbdatacube = gj.GribJump() +slicer = HullSlicer() +# self_API = Polytope( +# datacube=fdbdatacube, +# engine=slicer, +# options=options, +# ) + + +request = Request( + ConvexPolytope(["param"], [["164"]]), + ConvexPolytope(["time"], [[pd.Timedelta(hours=1, minutes=0)], [pd.Timedelta(hours=3, minutes=0)]]), + ConvexPolytope(["resolution"], [["high"]]), + ConvexPolytope(["type"], [["fc"]]), + ConvexPolytope(["model"], [["ifs-nemo"]]), + ConvexPolytope(["stream"], [["clte"]]), + ConvexPolytope(["realization"], ["1"]), + ConvexPolytope(["expver"], [["0001"]]), + ConvexPolytope(["experiment"], [["ssp3-7.0"]]), + ConvexPolytope(["generation"], [["1"]]), + ConvexPolytope(["levtype"], [["sfc"]]), + ConvexPolytope(["activity"], [["scenariomip"]]), + ConvexPolytope(["dataset"], [["climate-dt"]]), + ConvexPolytope(["class"], [["d1"]]), + ConvexPolytope(["date"], [[pd.Timestamp("20220811")]]), + ConvexPolytope(["latitude", "longitude"], [[0, 0], [0.5, 0.5], [0, 0.5]]), +) + +time3 = time.time() +# result = self_API.retrieve(request) +# result = self_API.slice(request.polytopes()) +time4 = time.time() + +print("TIME EXTRACTING USING GJ NORMAL") +print(time4 - time3) + + +# print(result) + +# print(result.leaves) + +# sliced_tree = actual_slice(fdb_tree, combi_polytopes, datacube_axes, datacube_transformations) diff --git a/tests/test_qubed_extraction_service.py b/tests/test_qubed_extraction_service.py index d303b03d7..c13f71cc1 100644 --- a/tests/test_qubed_extraction_service.py +++ b/tests/test_qubed_extraction_service.py @@ -1,25 +1,35 @@ -from polytope_feature.shapes import Box, Select, Span -from polytope_feature.polytope import Polytope, Request -from polytope_feature.engine.qubed_slicer import QubedSlicer -from polytope_feature.datacube.backends.qubed import QubedDatacube -from polytope_feature.datacube.backends.fdb import FDBDatacube +import time + +import pandas as pd +import pygribjump as gj import pytest -from qubed import Qube import requests -from polytope_feature.datacube.datacube_axis import PandasTimedeltaDatacubeAxis, PandasTimestampDatacubeAxis, UnsliceableDatacubeAxis, FloatDatacubeAxis -# from polytope_feature.datacube.backends.test_qubed_slicing import actual_slice -from polytope_feature.datacube.transformations.datacube_type_change.datacube_type_change import TypeChangeStrToTimestamp, TypeChangeStrToTimedelta -import pandas as pd -from polytope_feature.datacube.transformations.datacube_mappers.mapper_types.healpix_nested import NestedHealpixGridMapper +from qubed import Qube -from polytope_feature.shapes import ConvexPolytope -import time -import pygribjump as gj +from polytope_feature.datacube.backends.fdb import FDBDatacube +from polytope_feature.datacube.backends.qubed import QubedDatacube +from polytope_feature.datacube.datacube_axis import ( + FloatDatacubeAxis, + PandasTimedeltaDatacubeAxis, + PandasTimestampDatacubeAxis, + UnsliceableDatacubeAxis, +) +from polytope_feature.datacube.transformations.datacube_mappers.mapper_types.healpix_nested import ( + NestedHealpixGridMapper, +) + +# from polytope_feature.datacube.backends.test_qubed_slicing import actual_slice +from polytope_feature.datacube.transformations.datacube_type_change.datacube_type_change import ( + TypeChangeStrToTimedelta, + TypeChangeStrToTimestamp, +) from polytope_feature.engine.hullslicer import HullSlicer +from polytope_feature.engine.qubed_slicer import QubedSlicer +from polytope_feature.polytope import Polytope, Request +from polytope_feature.shapes import Box, ConvexPolytope, Select, Span def find_relevant_subcube_from_request(request, qube_url): - # NOTE: final url we want is like: # "https://qubed.lumi.apps.dte.destination-earth.eu/api/v1/select/climate-dt/?class=d1&dataset=climate-dt" @@ -43,8 +53,9 @@ def get_fdb_tree(request): return fdb_tree -fdb_tree = Qube.from_json(requests.get( - "https://github.com/ecmwf/qubed/raw/refs/heads/main/tests/example_qubes/climate_dt.json").json()) +fdb_tree = Qube.from_json( + requests.get("https://github.com/ecmwf/qubed/raw/refs/heads/main/tests/example_qubes/climate-dt.json").json() +) # print(fdb_tree) @@ -69,24 +80,25 @@ def get_fdb_tree(request): # ] # TODO: add lat and lon axes -datacube_axes = {"param": UnsliceableDatacubeAxis(), - "time": PandasTimedeltaDatacubeAxis(), - "resolution": UnsliceableDatacubeAxis(), - "type": UnsliceableDatacubeAxis(), - "model": UnsliceableDatacubeAxis(), - "stream": UnsliceableDatacubeAxis(), - "realization": UnsliceableDatacubeAxis(), - "expver": UnsliceableDatacubeAxis(), - "experiment": UnsliceableDatacubeAxis(), - "generation": UnsliceableDatacubeAxis(), - "levtype": UnsliceableDatacubeAxis(), - "activity": UnsliceableDatacubeAxis(), - "dataset": UnsliceableDatacubeAxis(), - "class": UnsliceableDatacubeAxis(), - "date": PandasTimestampDatacubeAxis(), - # "latitude": FloatDatacubeAxis(), - # "longitude": FloatDatacubeAxis() - } +datacube_axes = { + "param": UnsliceableDatacubeAxis(), + "time": PandasTimedeltaDatacubeAxis(), + "resolution": UnsliceableDatacubeAxis(), + "type": UnsliceableDatacubeAxis(), + "model": UnsliceableDatacubeAxis(), + "stream": UnsliceableDatacubeAxis(), + "realization": UnsliceableDatacubeAxis(), + "expver": UnsliceableDatacubeAxis(), + "experiment": UnsliceableDatacubeAxis(), + "generation": UnsliceableDatacubeAxis(), + "levtype": UnsliceableDatacubeAxis(), + "activity": UnsliceableDatacubeAxis(), + "dataset": UnsliceableDatacubeAxis(), + "class": UnsliceableDatacubeAxis(), + "date": PandasTimestampDatacubeAxis(), + # "latitude": FloatDatacubeAxis(), + # "longitude": FloatDatacubeAxis() +} time_val = pd.Timedelta(hours=0, minutes=0) date_val = pd.Timestamp("20300101T000000") @@ -96,13 +108,16 @@ def get_fdb_tree(request): datacube_transformations = { "time": TypeChangeStrToTimedelta("time", time_val), "date": TypeChangeStrToTimestamp("date", date_val), - "values": NestedHealpixGridMapper("values", ["latitude", "longitude"], 1024) + "values": NestedHealpixGridMapper("values", ["latitude", "longitude"], 1024), } options = { "axis_config": [ {"axis_name": "step", "transformations": [{"name": "type_change", "type": "int"}]}, + {"axis_name": "expver", "transformations": [{"name": "type_change", "type": "int"}]}, + {"axis_name": "realization", "transformations": [{"name": "type_change", "type": "int"}]}, + {"axis_name": "generation", "transformations": [{"name": "type_change", "type": "int"}]}, {"axis_name": "number", "transformations": [{"name": "type_change", "type": "int"}]}, # { # "axis_name": "date", @@ -133,22 +148,26 @@ def get_fdb_tree(request): "type", ], "pre_path": {"class": "od", "expver": "0001", "levtype": "sfc", "stream": "oper"}, - "datacube_axes": {"param": "UnsliceableDatacubeAxis", - "time": "PandasTimedeltaDatacubeAxis", - "resolution": "UnsliceableDatacubeAxis", - "type": "UnsliceableDatacubeAxis", - "model": "UnsliceableDatacubeAxis", - "stream": "UnsliceableDatacubeAxis", - "realization": "UnsliceableDatacubeAxis", - "expver": "UnsliceableDatacubeAxis", - "experiment": "UnsliceableDatacubeAxis", - "generation": "UnsliceableDatacubeAxis", - "levtype": "UnsliceableDatacubeAxis", - "activity": "UnsliceableDatacubeAxis", - "dataset": "UnsliceableDatacubeAxis", - "class": "UnsliceableDatacubeAxis", - "date": "PandasTimestampDatacubeAxis", - } + "datacube_axes": { + "param": "UnsliceableDatacubeAxis", + "time": "PandasTimedeltaDatacubeAxis", + "resolution": "UnsliceableDatacubeAxis", + "type": "UnsliceableDatacubeAxis", + "model": "UnsliceableDatacubeAxis", + "stream": "UnsliceableDatacubeAxis", + # "realization": "UnsliceableDatacubeAxis", + "realization": "IntDatacubeAxis", + # "expver": "UnsliceableDatacubeAxis", + "expver": "IntDatacubeAxis", + "experiment": "UnsliceableDatacubeAxis", + # "generation": "UnsliceableDatacubeAxis", + "generation": "IntDatacubeAxis", + "levtype": "UnsliceableDatacubeAxis", + "activity": "UnsliceableDatacubeAxis", + "dataset": "UnsliceableDatacubeAxis", + "class": "UnsliceableDatacubeAxis", + "date": "PandasTimestampDatacubeAxis", + }, } # request = Request( @@ -166,27 +185,35 @@ def get_fdb_tree(request): request = Request( # ConvexPolytope(["param"], [["164"]]), - Select("param", ["164"]), + # Select("param", ["164"]), + Select("param", ["165"]), ConvexPolytope(["time"], [[pd.Timedelta(hours=0, minutes=0)], [pd.Timedelta(hours=3, minutes=0)]]), ConvexPolytope(["resolution"], [["high"]]), ConvexPolytope(["type"], [["fc"]]), # ConvexPolytope(["model"], [['ifs-nemo']]), - Select("model", ["ifs-nemo"]), + # Select("model", ["ifs-nemo"]), + Select("model", ["icon"]), Select("stream", ["clte"]), # ConvexPolytope(["stream"], [["clte"]]), ConvexPolytope(["realization"], ["1"]), - ConvexPolytope(["expver"], [['0001']]), - ConvexPolytope(["experiment"], [['ssp3-7.0']]), - ConvexPolytope(["generation"], [["1"]]), + ConvexPolytope(["expver"], [["0001"]]), + ConvexPolytope(["experiment"], [["ssp3-7.0"]]), + # ConvexPolytope(["generation"], [["1"]]), + Select("generation", [1]), ConvexPolytope(["levtype"], [["sfc"]]), # ConvexPolytope(["activity"], [["scenariomip"]]), Select("activity", ["scenariomip"]), ConvexPolytope(["dataset"], [["climate-dt"]]), ConvexPolytope(["class"], [["d1"]]), - ConvexPolytope(["date"], [[pd.Timestamp("20220811")]]), - ConvexPolytope(["latitude", "longitude"], [[0, 0], [5, 5], [0, 5]])) + # ConvexPolytope(["date"], [[pd.Timestamp("20220811")]]), + ConvexPolytope(["date"], [[pd.Timestamp("20200908")]]), + ConvexPolytope(["latitude", "longitude"], [[0, 0], [5, 5], [0, 5]]), +) -fdb_tree = get_fdb_tree(request) +# fdb_tree = get_fdb_tree(request) +path_to_qube = "../qubed/" +full_qube_path = path_to_qube + "tests/example_qubes/climate_dt_with_metadata.json" +fdb_tree = Qube.load(full_qube_path) print("HERE WE HAVE THE FDB TREE") print(fdb_tree) diff --git a/tests/test_reduced_ll_grid.py b/tests/test_reduced_ll_grid.py index b0a3535c4..3b8f3eb3c 100644 --- a/tests/test_reduced_ll_grid.py +++ b/tests/test_reduced_ll_grid.py @@ -12,7 +12,7 @@ class TestReducedLatLonGrid: def setup_method(self, method): - nexus_url = "https://get.ecmwf.int/test-data/polytope/test-data/wave.grib" + nexus_url = "https://sites.ecmwf.int/repository/polytope/test-data/wave.grib" download_test_data(nexus_url, "wave.grib") self.options = { "axis_config": [ diff --git a/tests/test_regular_grid.py b/tests/test_regular_grid.py index e094b3bca..2c9cc1c78 100644 --- a/tests/test_regular_grid.py +++ b/tests/test_regular_grid.py @@ -12,7 +12,7 @@ class TestRegularGrid: def setup_method(self, method): - nexus_url = "https://get.ecmwf.int/test-data/polytope/test-data/era5-levels-members.grib" + nexus_url = "https://sites.ecmwf.int/repository/polytope/test-data/era5-levels-members.grib" download_test_data(nexus_url, "era5-levels-members.grib") self.options = { "axis_config": [ diff --git a/tests/test_regular_reduced_grid.py b/tests/test_regular_reduced_grid.py index 08d64fba6..7de5d8584 100644 --- a/tests/test_regular_reduced_grid.py +++ b/tests/test_regular_reduced_grid.py @@ -52,13 +52,13 @@ def setup_method(self, method): # Testing different shapes @pytest.mark.fdb - def test_fdb_datacube(self): + def test_fdb_datacube(self, downloaded_data_test_files): import pygribjump as gj request = Request( Select("step", [0]), Select("levtype", ["sfc"]), - Select("date", [pd.Timestamp("20231102T000000")]), + Select("date", [pd.Timestamp("20240103T0000")]), Select("domain", ["g"]), Select("expver", ["0001"]), Select("param", ["167"]), @@ -87,7 +87,11 @@ def test_fdb_datacube(self): eccodes_lats = [] eccodes_lons = [] tol = 1e-12 - f = open("tests/data/aifs_data_from_fdb.grib", "rb") + aifs_file = "" + for f in downloaded_data_test_files: + if "aifs_data_from_fdb.grib" in f.name: + aifs_file = f + f = open(aifs_file, "rb") messages = [] message = codes_grib_new_from_file(f) messages.append(message) diff --git a/tests/test_request_tree.py b/tests/test_request_tree.py index 038599724..99e02eb7c 100644 --- a/tests/test_request_tree.py +++ b/tests/test_request_tree.py @@ -177,69 +177,6 @@ def test_remove_branch(self): child1.remove_branch() assert root_node.children == SortedList([child2]) - # def test_intersect(self): - # axis1 = IntDatacubeAxis() - # axis2 = IntDatacubeAxis() - # axis3 = IntDatacubeAxis() - # axis4 = IntDatacubeAxis() - # axis5 = IntDatacubeAxis() - # axis6 = IntDatacubeAxis() - # axis1.name = "grandchild1" - # axis2.name = "grandchild2" - # axis3.name = "grandchild3" - # axis4.name = "grandchild4" - # axis5.name = "child1" - # axis6.name = "child2" - # grandchild1_1 = TensorIndexTree(axis=axis1) - # grandchild2_1 = TensorIndexTree(axis=axis2) - # grandchild3_1 = TensorIndexTree(axis=axis3) - # grandchild4_1 = TensorIndexTree(axis=axis4) - # child1_1 = TensorIndexTree(axis=axis5) - # child1_1.add_child(grandchild1_1) - # child1_1.add_child(grandchild2_1) - # child2_1 = TensorIndexTree(axis=axis6) - # child2_1.add_child(grandchild3_1) - # child2_1.add_child(grandchild4_1) - # root_node1 = TensorIndexTree() - # root_node1.add_child(child1_1) - # root_node1.add_child(child2_1) - - # axis1 = IntDatacubeAxis() - # axis2 = IntDatacubeAxis() - # axis3 = IntDatacubeAxis() - # axis4 = IntDatacubeAxis() - # axis5 = IntDatacubeAxis() - # axis6 = IntDatacubeAxis() - # axis1.name = "grandchild1" - # axis2.name = "grandchild5" - # axis3.name = "grandchild6" - # axis4.name = "grandchild7" - # axis5.name = "child1" - # axis6.name = "child3" - # grandchild1_2 = TensorIndexTree(axis=axis1) - # grandchild2_2 = TensorIndexTree(axis=axis2) - # grandchild3_2 = TensorIndexTree(axis=axis3) - # grandchild4_2 = TensorIndexTree(axis=axis4) - # child1_2 = TensorIndexTree(axis=axis5) - # child1_2.add_child(grandchild1_2) - # child1_2.add_child(grandchild2_2) - # child2_2 = TensorIndexTree(axis=axis6) - # child2_2.add_child(grandchild3_2) - # child2_2.add_child(grandchild4_2) - # root_node2 = TensorIndexTree() - # root_node2.add_child(child1_2) - # root_node2.add_child(child2_2) - # root_node1.pprint() - # root_node2.pprint() - - # root_node1.intersect(root_node2) - - # root_node1.pprint() - # assert len(root_node1.children) == 1 - # assert list(root_node1.children)[0].axis.name == "child1" - # assert len(list(root_node1.children)[0].children) == 1 - # assert list(list(root_node1.children)[0].children)[0].axis.name == "grandchild1" - def test_flatten(self): axis1 = IntDatacubeAxis() axis2 = IntDatacubeAxis() diff --git a/tests/test_shapes.py b/tests/test_shapes.py index ac730905a..5b5b7b921 100644 --- a/tests/test_shapes.py +++ b/tests/test_shapes.py @@ -66,25 +66,24 @@ def test_all_mapper_cyclic(self): "type": "octahedral", "resolution": 1280, "axes": ["latitude", "longitude"], - "md5_hash": "5ea6378bf5e2904f565ef7221da63a09", } ], }, {"axis_name": "latitude", "transformations": [{"name": "reverse", "is_reverse": True}]}, {"axis_name": "longitude", "transformations": [{"name": "cyclic", "range": [0, 360]}]}, ], - "pre_path": {"class": "od", "expver": "0001", "levtype": "sfc", "stream": "oper"}, + "pre_path": {"class": "od", "expver": "0001", "levtype": "sfc", "type": "fc", "stream": "oper"}, } self.fdbdatacube = gj.GribJump() self.slicer = HullSlicer() request = Request( - Select("step", [11]), + Select("step", [0]), Select("levtype", ["sfc"]), - Select("date", [pd.Timestamp("20230710T120000")]), + Select("date", [pd.Timestamp("20240103T0000")]), Select("domain", ["g"]), Select("expver", ["0001"]), - Select("param", ["151130"]), + Select("param", ["167"]), Select("class", ["od"]), Select("stream", ["oper"]), Select("type", ["fc"]), diff --git a/tests/test_slice_date_range_fdb.py b/tests/test_slice_date_range_fdb.py index dae81cb22..f66695f87 100644 --- a/tests/test_slice_date_range_fdb.py +++ b/tests/test_slice_date_range_fdb.py @@ -42,7 +42,6 @@ def setup_method(self, method): ], } - # Testing different shapes @pytest.mark.fdb def test_fdb_datacube(self): import pygribjump as gj diff --git a/tests/test_slice_date_range_fdb_v2.py b/tests/test_slice_date_range_fdb_v2.py index fcacf6b7c..a4454d4e8 100644 --- a/tests/test_slice_date_range_fdb_v2.py +++ b/tests/test_slice_date_range_fdb_v2.py @@ -45,7 +45,6 @@ def setup_method(self, method): ], } - # Testing different shapes @pytest.mark.fdb @pytest.mark.skip(reason="gribjump problem") def test_fdb_datacube(self): diff --git a/tests/test_slice_fdb_box.py b/tests/test_slice_fdb_box.py index a21910dfd..4918317b1 100644 --- a/tests/test_slice_fdb_box.py +++ b/tests/test_slice_fdb_box.py @@ -46,7 +46,6 @@ def setup_method(self, method): ], } - # Testing different shapes @pytest.mark.skip(reason="optimisation test") @pytest.mark.fdb def test_fdb_datacube(self): diff --git a/tests/test_slicer_era5.py b/tests/test_slicer_era5.py index 9f11bcef8..cf5223fbf 100644 --- a/tests/test_slicer_era5.py +++ b/tests/test_slicer_era5.py @@ -10,7 +10,7 @@ class TestSlicingEra5Data: def setup_method(self, method): - nexus_url = "https://get.ecmwf.int/test-data/polytope/test-data/era5-levels-members.grib" + nexus_url = "https://sites.ecmwf.int/repository/polytope/test-data/era5-levels-members.grib" download_test_data(nexus_url, "era5-levels-members.grib") ds = data.from_source("file", "./tests/data/era5-levels-members.grib") diff --git a/tests/test_snapping_real_data.py b/tests/test_snapping_real_data.py index 9b5950742..aaddd688d 100644 --- a/tests/test_snapping_real_data.py +++ b/tests/test_snapping_real_data.py @@ -13,7 +13,7 @@ class TestSlicingEra5Data: def setup_method(self, method): - nexus_url = "https://get.ecmwf.int/test-data/polytope/test-data/era5-levels-members.grib" + nexus_url = "https://sites.ecmwf.int/repository/polytope/test-data/era5-levels-members.grib" download_test_data(nexus_url, "era5-levels-members.grib") ds = data.from_source("file", "./tests/data/era5-levels-members.grib") diff --git a/tests/test_tree_protobuf_encoding_fdb.py b/tests/test_tree_protobuf_encoding_fdb.py index 2e288f0d6..99b8f169c 100644 --- a/tests/test_tree_protobuf_encoding_fdb.py +++ b/tests/test_tree_protobuf_encoding_fdb.py @@ -75,7 +75,6 @@ def test_encoding(self): fdb_datacube = self.API.datacube fdb_datacube.prep_tree_encoding(result) encoded_bytes = encode_tree(result) - # write_encoded_tree_to_file(encoded_bytes) decoded_tree = decode_tree(fdb_datacube, encoded_bytes) decoded_tree.pprint() assert decoded_tree.leaves[0].result_size == [1, 1, 1, 1, 1, 1, 1, 1, 1, 1] diff --git a/tests/test_union_gj.py b/tests/test_union_gj.py index 9182fc360..98083873b 100644 --- a/tests/test_union_gj.py +++ b/tests/test_union_gj.py @@ -42,7 +42,6 @@ def setup_method(self, method): ], } - # Testing different shapes @pytest.mark.fdb def test_fdb_datacube(self): import pygribjump as gj diff --git a/tests/test_union_point_box.py b/tests/test_union_point_box.py index ba280bc02..2057ff91a 100644 --- a/tests/test_union_point_box.py +++ b/tests/test_union_point_box.py @@ -42,7 +42,6 @@ def setup_method(self, method): ], } - # Testing different shapes @pytest.mark.fdb @pytest.mark.skip(reason="point and box are not same dimensions") def test_fdb_datacube(self): diff --git a/tests/test_wave_spectra_data.py b/tests/test_wave_spectra_data.py index 6835fee4c..2fe4aa208 100644 --- a/tests/test_wave_spectra_data.py +++ b/tests/test_wave_spectra_data.py @@ -45,7 +45,7 @@ def setup_method(self, method): self.slicer = HullSlicer() @pytest.mark.fdb - def test_healpix_grid(self): + def test_healpix_grid(self, downloaded_data_test_files): import pygribjump as gj self.fdb_datacube = gj.GribJump() @@ -80,6 +80,10 @@ def test_healpix_grid(self): eccodes_lats = [] tol = 1e-8 leaves = result.leaves + wave_file = "" + for f in downloaded_data_test_files: + if "wave_spectra.grib" in f.name: + wave_file = f for i, leaf in enumerate(leaves): cubepath = leaf.flatten() lat = cubepath["latitude"][0] @@ -87,7 +91,7 @@ def test_healpix_grid(self): for j, lon in enumerate(new_lons): lats.append(lat) lons.append(lon) - nearest_points = find_nearest_latlon("./tests/data/wave_spectra.grib", lat, lon) + nearest_points = find_nearest_latlon(wave_file, lat, lon) eccodes_lat = nearest_points[0][0]["lat"] eccodes_lon = nearest_points[0][0]["lon"] assert eccodes_lat - tol <= lat