diff --git a/.github/ci-config.yml b/.github/ci-config.yml
new file mode 100644
index 000000000..416a96d14
--- /dev/null
+++ b/.github/ci-config.yml
@@ -0,0 +1,13 @@
+dependencies: |
+ ecmwf/ecbuild
+ MathisRosenhauer/libaec@refs/tags/v1.1.3
+ ecmwf/eccodes
+ ecmwf/eckit@develop
+ ecmwf/metkit
+ ecmwf/fdb
+ ecmwf/gribjump
+dependency_cmake_options: |
+ ecmwf/gribjump: "-DENABLE_FDB_BUILD_TOOLS=ON"
+dependency_branch: develop
+parallelism_factor: 8
+self_build: false
\ No newline at end of file
diff --git a/.github/ci-hpc-config.yml b/.github/ci-hpc-config.yml
new file mode 100644
index 000000000..98a9a857c
--- /dev/null
+++ b/.github/ci-hpc-config.yml
@@ -0,0 +1,21 @@
+build:
+ python: "3.10"
+ modules:
+ - ninja
+ - aec
+ dependencies:
+ - ecmwf/ecbuild@develop
+ - ecmwf/eccodes@develop
+ - ecmwf/eckit@develop
+ - ecmwf/metkit@develop
+ - ecmwf/fdb@develop
+ - ecmwf/gribjump@develop
+ dependency_cmake_options:
+ - "ecmwf/gribjump: '-DENABLE_FDB_BUILD_TOOLS=ON'"
+ - "ecmwf/fdb: '-DENABLE_LUSTRE=OFF'"
+ parallel: 64
+ env:
+ - ECCODES_SAMPLES_PATH=$ECCODES_DIR/share/eccodes/samples
+ - ECCODES_DEFINITION_PATH=$ECCODES_DIR/share/eccodes/definitions
+ pytest_cmd: |
+ python -m pytest -m 'not fdb' tests --cov=./ --cov-report=xml
diff --git a/.github/workflows/cd.yml b/.github/workflows/cd.yml
new file mode 100644
index 000000000..ae4568c03
--- /dev/null
+++ b/.github/workflows/cd.yml
@@ -0,0 +1,11 @@
+name: cd
+
+on:
+ push:
+ tags:
+ - '**'
+
+jobs:
+ deploy:
+ uses: ecmwf-actions/reusable-workflows/.github/workflows/create-package.yml@v2
+ secrets: inherit
\ No newline at end of file
diff --git a/.github/workflows/ci.yaml b/.github/workflows/ci.yaml
index 24a5887c9..81fb4033d 100644
--- a/.github/workflows/ci.yaml
+++ b/.github/workflows/ci.yaml
@@ -1,145 +1,45 @@
name: ci
+
on:
# Trigger the workflow on push to master or develop, except tag creation
push:
branches:
- - 'main'
- - 'develop'
+ - "main"
+ - "develop"
+ tags-ignore:
+ - "**"
+
# Trigger the workflow on pull request
pull_request: ~
+
# Trigger the workflow manually
workflow_dispatch: ~
+
# Trigger after public PR approved for CI
pull_request_target:
types: [labeled]
- release:
- types: [created]
-jobs:
- qa:
- name: qa
- runs-on: ubuntu-20.04
- steps:
- - name: Checkout Repository
- uses: actions/checkout@v3
- with:
- repository: ${{ inputs.repository }}
- ref: ${{ inputs.ref }}
- - name: Setup Python
- uses: actions/setup-python@v4
- with:
- python-version: ${{ inputs.python_version }}
-
- - name: Install Python Dependencies
- run: |
- python -m pip install --upgrade pip
- python -m pip install black flake8 isort
- - name: Check isort
- run: isort --check .
-
- - name: Check black
- run: black --check .
-
- - name: Check flake8
- run: flake8 .
- setup:
- name: setup
- runs-on: ubuntu-20.04
- outputs:
- matrix: ${{ steps.set-matrix.outputs.matrix }}
- inputs: ${{ steps.prepare-inputs.outputs.inputs }}
- inputs-for-ubuntu: ${{ steps.prepare-inputs.outputs.inputs-for-ubuntu }}
- steps:
- - name: Set Matrix
- id: set-matrix
- shell: bash -eux {0}
- run: |
- MATRIX=$(cat << 'EOS'
- name:
- - gnu-11@ubuntu-22.04
- - clang-14@ubuntu-22.04
- include:
- - name: gnu-11@ubuntu-22.04
- os: ubuntu-22.04
- compiler: gnu-11
- compiler_cc: gcc-11
- compiler_cxx: g++-11
- compiler_fc: gfortran-11
- - name: clang-14@ubuntu-22.04
- os: ubuntu-22.04
- compiler: clang-14
- compiler_cc: clang-14
- compiler_cxx: clang++-14
- compiler_fc: gfortran-11
- # Xcode compiler requires empty environment variables, so we pass null (~) here
- EOS
- )
- SKIP_MATRIX_JOBS=$(cat << 'EOS'
- ${{ inputs.skip_matrix_jobs }}
- EOS
- )
- SELECT_NAME_COND="1 != 1"
- SELECT_INCLUDE_COND="1 != 1"
- for skip_job in $SKIP_MATRIX_JOBS; do SELECT_NAME_COND="$SELECT_NAME_COND or . == \"$skip_job\""; SELECT_INCLUDE_COND="$SELECT_INCLUDE_COND or .name == \"$skip_job\""; done
- echo matrix=$(echo "$MATRIX" | yq eval "del(.name[] | select($SELECT_NAME_COND)) | del(.include[] | select($SELECT_INCLUDE_COND))" --output-format json --indent 0 -) >> $GITHUB_OUTPUT
- - name: Prepare build-package Inputs
- id: prepare-inputs
- shell: bash -eux {0}
- run: |
- echo inputs=$(echo "${{ inputs.build_package_inputs || '{}' }}" | yq eval '.' --output-format json --indent 0 -) >> $GITHUB_OUTPUT
- echo inputs-for-ubuntu=$(echo "${{ inputs.build_package_inputs || '{}' }}" | yq eval '. * {"os":"ubuntu-20.04","compiler":"gnu-10","compiler_cc":"gcc-10","compiler_cxx":"g++-10","compiler_fc":"gfortran-10"}' --output-format json --indent 0 -) >> $GITHUB_OUTPUT
- test:
- name: test
- needs:
- - qa
- - setup
- strategy:
- fail-fast: false
- matrix: ${{ fromJson(needs.setup.outputs.matrix) }}
- runs-on: ${{ matrix.os }}
- steps:
- - uses: actions/checkout@v4
- - name: Install eccodes and Dependencies
- id: install-dependencies
- uses: ecmwf-actions/build-package@v2
- with:
- self_build: false
- dependencies: |
- ecmwf/ecbuild@develop
- MathisRosenhauer/libaec@master
- ecmwf/eccodes@develop
-
- - name: Setup Python
- uses: actions/setup-python@v4
- with:
- python-version: ${{ inputs.python_version }}
-
- - name: Install Python Dependencies
- run: |
- python -m pip install --upgrade pip
- python -m pip install pytest pytest-cov
- python -m pip install -r requirements.txt
- python -m pip install -r ./tests/requirements_test.txt
-
- - name: Verify Source Distribution
- shell: bash -eux {0}
- run: |
- python setup.py sdist
- python -m pip install dist/*
- - name: Run Tests with Repository Code
- env:
- LD_LIBRARY_PATH: ${{ steps.install-dependencies.outputs.lib_path }}
- shell: bash -eux {0}
- run: |
- DYLD_LIBRARY_PATH=${{ env.LD_LIBRARY_PATH }} python -m pytest -m "not fdb" tests --cov=./ --cov-report=xml
- python -m coverage report
+jobs:
+ # Run CI including downstream packages on self-hosted runners
+ downstream-ci:
+ name: downstream-ci
+ if: ${{ !github.event.pull_request.head.repo.fork && github.event.action != 'labeled' || github.event.label.name == 'approved-for-ci' }}
+ uses: ecmwf/downstream-ci/.github/workflows/downstream-ci.yml@main
+ with:
+ polytope: ecmwf/polytope@${{ github.event.pull_request.head.sha || github.sha }}
+ codecov_upload: true
+ python_qa: true
+ secrets: inherit
+
+ downstream-ci-hpc:
+ name: downstream-ci-hpc
+ if: ${{ !github.event.pull_request.head.repo.fork && github.event.action != 'labeled' || github.event.label.name == 'approved-for-ci' }}
+ uses: ecmwf/downstream-ci/.github/workflows/downstream-ci-hpc.yml@main
+ with:
+ polytope: ecmwf/polytope@${{ github.event.pull_request.head.sha || github.sha }}
+ secrets: inherit
- - name: Upload coverage to Codecov
- uses: codecov/codecov-action@v4
- with:
- files: coverage.xml
deploy:
- needs: test
if: ${{ github.event_name == 'release' }}
name: Upload to Pypi
runs-on: ubuntu-latest
@@ -159,4 +59,4 @@ jobs:
TWINE_PASSWORD: ${{ secrets.PYPI_API_TOKEN }}
run: |
python -m build
- twine upload dist/*
\ No newline at end of file
+ twine upload dist/*
diff --git a/.github/workflows/label-public-pr.yml b/.github/workflows/label-public-pr.yml
new file mode 100644
index 000000000..52f3b87c5
--- /dev/null
+++ b/.github/workflows/label-public-pr.yml
@@ -0,0 +1,10 @@
+# Manage labels of pull requests that originate from forks
+name: label-public-pr
+
+on:
+ pull_request_target:
+ types: [opened, synchronize]
+
+jobs:
+ label:
+ uses: ecmwf-actions/reusable-workflows/.github/workflows/label-pr.yml@v2
\ No newline at end of file
diff --git a/.gitignore b/.gitignore
index 3bf9b6198..dc2893e55 100644
--- a/.gitignore
+++ b/.gitignore
@@ -29,4 +29,8 @@ polytope_feature/_version.py
**.lock
**/target
**/*venv*/
-test_w_qubed
\ No newline at end of file
+test_w_qubed
+performance_plots_qubed
+_version.py
+*.DS_Store
+*.nc
\ No newline at end of file
diff --git a/codecov.yml b/codecov.yml
index f911473e5..41335c532 100644
--- a/codecov.yml
+++ b/codecov.yml
@@ -6,4 +6,10 @@ ignore:
- "**/test*"
- "**/pb2*"
- "**/fdb*" # ignore fdb backend which we can't test in CI yet
- - "polytope/datacube/backends/fdb.py" # ignore fdb backend which we can't test in CI yet
\ No newline at end of file
+ - "polytope/datacube/backends/fdb.py" # ignore fdb backend which we can't test in CI yet
+
+coverage:
+ status:
+ patch:
+ default:
+ enabled: false # disables patch check
\ No newline at end of file
diff --git a/docs/Service/Examples/OpenData/od_boundingbox.ipynb b/docs/Service/Examples/OpenData/od_boundingbox_example.ipynb
similarity index 100%
rename from docs/Service/Examples/OpenData/od_boundingbox.ipynb
rename to docs/Service/Examples/OpenData/od_boundingbox_example.ipynb
diff --git a/docs/Service/Examples/examples.md b/docs/Service/Examples/examples.md
index 69d427998..2179a4103 100644
--- a/docs/Service/Examples/examples.md
+++ b/docs/Service/Examples/examples.md
@@ -12,11 +12,11 @@
## Open Data
* Timeseries
-* Timeseries
-* Timeseries
-* Timeseries
-* Timeseries
-* Timeseries
+* Polygon
+* Vertical Profile
+* Bounding Box
+* Trajectory
+* Country Cut-Out
For examples of Polytope Feature Extraction on Destination Earth Digital Twin Data please visit the following Github Repo: https://github.com/destination-earth-digital-twins/polytope-examples
diff --git a/polytope_feature/datacube/backends/datacube.py b/polytope_feature/datacube/backends/datacube.py
index 1d9d94de6..8dc7a8ed2 100644
--- a/polytope_feature/datacube/backends/datacube.py
+++ b/polytope_feature/datacube/backends/datacube.py
@@ -145,7 +145,6 @@ def get_indices(self, path: DatacubePath, axis, lower, upper, method=None):
e.g. returns integer discrete points between two floats
"""
path = self.fit_path(path)
- print(path)
indexes = axis.find_indexes(path, self)
idx_between = axis.find_indices_between(indexes, lower, upper, self, method)
@@ -170,7 +169,15 @@ def remap_path(self, path: DatacubePath):
return path
@staticmethod
- def create(datacube, config={}, axis_options={}, compressed_axes_options=[], alternative_axes=[], datacube_axes={}, context=None):
+ def create(
+ datacube,
+ config={},
+ axis_options={},
+ compressed_axes_options=[],
+ alternative_axes=[],
+ datacube_axes={},
+ context=None,
+ ):
# TODO: get the configs as None for pre-determined value and change them to empty dictionary inside the function
if type(datacube).__name__ == "DataArray":
from .xarray import XArrayDatacube
@@ -185,13 +192,15 @@ def create(datacube, config={}, axis_options={}, compressed_axes_options=[], alt
)
return fdbdatacube
if type(datacube).__name__ == "Qube":
- from .qubed import QubedDatacube
from ..datacube_axis import _str_to_axis
+ from .qubed import QubedDatacube
+
actual_datacube_axes = {}
for key, value in datacube_axes.items():
actual_datacube_axes[key] = _str_to_axis[value]
- qubed_datacube = QubedDatacube(datacube, actual_datacube_axes,
- config, axis_options, compressed_axes_options, alternative_axes, context)
+ qubed_datacube = QubedDatacube(
+ datacube, actual_datacube_axes, config, axis_options, compressed_axes_options, alternative_axes, context
+ )
return qubed_datacube
def check_branching_axes(self, request):
diff --git a/polytope_feature/datacube/backends/qubed.py b/polytope_feature/datacube/backends/qubed.py
index b471569cb..67aa28dae 100644
--- a/polytope_feature/datacube/backends/qubed.py
+++ b/polytope_feature/datacube/backends/qubed.py
@@ -2,19 +2,26 @@
import operator
from copy import deepcopy
from itertools import product
-from ...utility.exceptions import BadGridError, BadRequestError, GribJumpNoIndexError
-from ...utility.geometry import nearest_pt
+
+import numpy as np
import pygribjump as pygj
from qubed.value_types import QEnum
-import numpy as np
+from ...utility.exceptions import BadGridError, GribJumpNoIndexError
+from ...utility.geometry import nearest_pt
from .datacube import Datacube, TensorIndexTree
class QubedDatacube(Datacube):
-
def __init__(
- self, q, datacube_axes, config=None, axis_options=None, compressed_axes_options=[], alternative_axes=[], context=None
+ self,
+ q,
+ datacube_axes,
+ config=None,
+ axis_options=None,
+ compressed_axes_options=[],
+ alternative_axes=[],
+ context=None,
):
if config is None:
config = {}
@@ -134,9 +141,26 @@ def get_indices(self, path, path_node, axis, lower, upper, method=None):
logging.debug(f"For axis {axis.name} between {lower} and {upper}, found indices {idx_between}")
- return idx_between
+ if path_node:
+ indexes = [indexes.index(item) for item in idx_between]
+ else:
+ indexes = None
+
+ return (idx_between, indexes)
def get(self, requests, context=None):
+ """
+ We have a compressed tree of requests, which we need to decompress completely with its metadata indexes.
+ BUT the last two axes, we would like to "ignore" in the decompression and instead, we create grid index ranges from them.
+ WHILE we decompress, we need to keep some kind of map from decompressed request + grid index ranges to corresponding tree node.
+ This mapping will map potentially several decompressed request + grid index ranges tuples to the same tree nodes.
+
+ ADDED DIFFICULTY: the grid index ranges MUST be ordered (which is not guaranteed from the tree) so we need to sort them, while also sorting the tree nodes so that they match up.
+
+ UNTIL NOW, we had a map of compressed request + grid index ranges tuples to tree nodes.
+ We could just add a map of compressed request -> list of decompressed request + associated metadata
+ """
+
if context is None:
context = {}
if len(requests.children) == 0:
@@ -149,6 +173,9 @@ def get(self, requests, context=None):
complete_list_complete_uncompressed_requests = []
complete_fdb_decoding_info = []
for j, compressed_request in enumerate(fdb_requests):
+ compressed_metadata = compressed_request[2]
+
+ # TODO: get uncompressed metadata for each leaf
uncompressed_request = {}
# Need to determine the possible decompressed requests
@@ -159,13 +186,36 @@ def get(self, requests, context=None):
interm_branch_tuple_values.append(compressed_request[0][key])
request_combis = product(*interm_branch_tuple_values)
+ index_combis_raw = list(product(*[range(len(lst)) for lst in interm_branch_tuple_values]))
+ index_combis = [(0, *comb) for comb in index_combis_raw]
+
# Need to extract the possible requests and add them to the right nodes
- for combi in request_combis:
- uncompressed_request = {}
- for i, key in enumerate(compressed_request[0].keys()):
- uncompressed_request[key] = combi[i]
- complete_uncompressed_request = (uncompressed_request, compressed_request[1], self.grid_md5_hash)
- complete_list_complete_uncompressed_requests.append(complete_uncompressed_request)
+
+ def find_metadata(metadata_idx):
+ metadata = {}
+ for k, vs in compressed_metadata.items():
+ metadata_depth = len(vs.shape)
+ relevant_metadata_dxs = metadata_idx[:metadata_depth]
+ metadata[k] = vs[relevant_metadata_dxs]
+ return metadata
+
+ for i, combi in enumerate(request_combis):
+ metadata_idxs = index_combis[i]
+ actual_metadata = find_metadata(metadata_idxs)
+
+ def flatten_metadata(value):
+ return value[0] if isinstance(value, np.ndarray) else value
+
+ path = flatten_metadata(actual_metadata["path"])
+ scheme = flatten_metadata(actual_metadata["scheme"])
+ offset = flatten_metadata(actual_metadata["offset"])
+ host = flatten_metadata(actual_metadata["host"])
+ port = flatten_metadata(actual_metadata["port"])
+
+ gj_extraction_request = pygj.PathExtractionRequest(
+ path, scheme, offset, host, port, compressed_request[1], self.grid_md5_hash)
+
+ complete_list_complete_uncompressed_requests.append(gj_extraction_request)
complete_fdb_decoding_info.append(fdb_requests_decoding_info[j])
if logging.root.level <= logging.DEBUG:
@@ -173,7 +223,7 @@ def get(self, requests, context=None):
logging.debug("The requests we give GribJump are: %s", printed_list_to_gj)
logging.info("Requests given to GribJump extract for %s", context)
try:
- output_values = self.gj.extract(complete_list_complete_uncompressed_requests, context)
+ output_values = self.gj.extract_from_paths(complete_list_complete_uncompressed_requests, context)
except Exception as e:
if "BadValue: Grid hash mismatch" in str(e):
logging.info("Error is: %s", e)
@@ -196,10 +246,15 @@ def get_fdb_requests(
fdb_requests=[],
fdb_requests_decoding_info=[],
leaf_path=None,
+ leaf_metadata=None,
):
+ # TODO: collect leaf metadata from qube here too
if leaf_path is None:
leaf_path = {}
+ if leaf_metadata is None:
+ leaf_metadata = {}
+
# First when request node is root, go to its children
if requests.key == "root":
logging.debug("Looking for data for the tree")
@@ -217,7 +272,7 @@ def get_fdb_requests(
if requests.key == "time":
new_vals = []
for val in key_value_path[requests.key]:
- new_vals.append(val[7:9]+val[10:12])
+ new_vals.append(val[7:9] + val[10:12])
key_value_path[requests.key] = new_vals
if requests.key == "date":
new_vals = []
@@ -225,6 +280,8 @@ def get_fdb_requests(
new_vals.append(val[:4] + val[5:7] + val[8:10])
key_value_path[requests.key] = new_vals
leaf_path.update(key_value_path)
+ # TODO: in the leaf metadata, try to instead store mapping leaf_path -> list(individual metadata dicts for each uncompressed value of tree)
+ leaf_metadata.update(requests.metadata)
if len(requests.children[0].children[0].children) == 0:
# find the fdb_requests and associated nodes to which to add results
(path, current_start_idxs, fdb_node_ranges, lat_length) = self.get_2nd_last_values(requests, leaf_path)
@@ -233,13 +290,14 @@ def get_fdb_requests(
sorted_request_ranges,
fdb_node_ranges,
) = self.sort_fdb_request_ranges(current_start_idxs, lat_length, fdb_node_ranges)
- fdb_requests.append((path, sorted_request_ranges))
+ fdb_requests.append((path, sorted_request_ranges, deepcopy(leaf_metadata)))
+ # TODO: did we need to deepcopy the leaf metadata??
fdb_requests_decoding_info.append((original_indices, fdb_node_ranges))
# Otherwise remap the path for this key and iterate again over children
else:
for c in requests.children:
- self.get_fdb_requests(c, fdb_requests, fdb_requests_decoding_info, leaf_path)
+ self.get_fdb_requests(c, fdb_requests, fdb_requests_decoding_info, leaf_path, leaf_metadata)
def remove_duplicates_in_request_ranges(self, fdb_node_ranges, current_start_idxs):
# seen_indices = set()
@@ -337,8 +395,6 @@ def get_2nd_last_values(self, requests, leaf_path=None):
current_start_idx = deepcopy(current_start_idxs[i])
fdb_range_nodes = deepcopy(fdb_node_ranges[i])
key_value_path = {lat_child.key: list(lat_child.values)}
- # print("WHAT ARE THE DATACUBE AXES NOW??")
- # print(self._axes.keys())
ax = self._axes[lat_child.key]
(key_value_path, leaf_path, self.unwanted_path) = ax.unmap_path_key(
key_value_path, leaf_path, self.unwanted_path
@@ -379,13 +435,13 @@ def assign_fdb_output_to_nodes(self, output_values, fdb_requests_decoding_info):
if len(request_output_values.values) == 0:
# If we are here, no data was found for this path in the fdb
none_array = [None] * len(n.values)
- if n.data.metadata.get("result", None) is None:
- n.data.metadata["result"] = []
- n.data.metadata["result"].extend(none_array)
+ if n.metadata.get("result", None) is None:
+ n.metadata["result"] = []
+ n.metadata["result"].extend(none_array)
else:
- if n.data.metadata.get("result", None) is None:
- n.data.metadata["result"] = []
- n.data.metadata["result"].extend(request_output_values.values[i])
+ if n.metadata.get("result", None) is None:
+ n.metadata["result"] = []
+ n.metadata["result"].extend(request_output_values.values[i])
def sort_fdb_request_ranges(self, current_start_idx, lat_length, fdb_node_ranges):
(new_fdb_node_ranges, new_current_start_idx) = self.remove_duplicates_in_request_ranges(
@@ -403,8 +459,11 @@ def sort_fdb_request_ranges(self, current_start_idx, lat_length, fdb_node_ranges
sorted_list = sorted(enumerate(old_interm_start_idx[j]), key=lambda x: x[1])
original_indices_idx, interm_start_idx = zip(*sorted_list)
for interm_fdb_nodes_obj in interm_fdb_nodes[j]:
- interm_fdb_nodes_obj.data.values = QEnum(tuple([list(interm_fdb_nodes_obj.values)[k]
- for k in original_indices_idx]))
+ # interm_fdb_nodes_obj.data.values = QEnum(tuple([list(interm_fdb_nodes_obj.values)[k]
+ # for k in original_indices_idx]))
+ interm_fdb_nodes_obj.values = QEnum(
+ tuple([list(interm_fdb_nodes_obj.values)[k] for k in original_indices_idx])
+ )
if abs(interm_start_idx[-1] + 1 - interm_start_idx[0]) <= len(interm_start_idx):
current_request_ranges = (interm_start_idx[0], interm_start_idx[-1] + 1)
interm_request_ranges.append(current_request_ranges)
diff --git a/polytope_feature/datacube/backends/qubed_fdb.py b/polytope_feature/datacube/backends/qubed_fdb.py
index dcd8b31fa..c3b339b29 100644
--- a/polytope_feature/datacube/backends/qubed_fdb.py
+++ b/polytope_feature/datacube/backends/qubed_fdb.py
@@ -3,11 +3,12 @@
from copy import deepcopy
from itertools import product
+import requests
+from qubed import Qube
+
from ...utility.exceptions import BadGridError, BadRequestError
from ...utility.geometry import nearest_pt
from .datacube import Datacube, TensorIndexTree
-from qubed import Qube
-import requests
class FDBDatacube(Datacube):
@@ -30,8 +31,11 @@ def __init__(
# Find values in the level 3 FDB datacube
self.gj = gj
- self.fdb_tree = Qube.from_json(requests.get(
- "https://github.com/ecmwf/qubed/raw/refs/heads/main/tests/example_qubes/climate_dt.json").json())
+ self.fdb_tree = Qube.from_json(
+ requests.get(
+ "https://github.com/ecmwf/qubed/raw/refs/heads/main/tests/example_qubes/climate_dt.json"
+ ).json()
+ )
if len(alternative_axes) == 0:
logging.info("Find GribJump axes for %s", context)
diff --git a/polytope_feature/datacube/datacube_axis.py b/polytope_feature/datacube/datacube_axis.py
index 06fbe1953..b2032a5b6 100644
--- a/polytope_feature/datacube/datacube_axis.py
+++ b/polytope_feature/datacube/datacube_axis.py
@@ -355,9 +355,10 @@ def serialize(self, value):
float: FloatDatacubeAxis(),
}
-_str_to_axis = {"FloatDatacubeAxis": FloatDatacubeAxis(),
- "IntDatacubeAxis": IntDatacubeAxis(),
- "UnsliceableDatacubeAxis": UnsliceableDatacubeAxis(),
- "PandasTimedeltaDatacubeAxis": PandasTimedeltaDatacubeAxis(),
- "PandasTimestampDatacubeAxis": PandasTimestampDatacubeAxis(),
- }
+_str_to_axis = {
+ "FloatDatacubeAxis": FloatDatacubeAxis(),
+ "IntDatacubeAxis": IntDatacubeAxis(),
+ "UnsliceableDatacubeAxis": UnsliceableDatacubeAxis(),
+ "PandasTimedeltaDatacubeAxis": PandasTimedeltaDatacubeAxis(),
+ "PandasTimestampDatacubeAxis": PandasTimestampDatacubeAxis(),
+}
diff --git a/polytope_feature/datacube/transformations/datacube_mappers/datacube_mappers.py b/polytope_feature/datacube/transformations/datacube_mappers/datacube_mappers.py
index e06c8fd38..80d971188 100644
--- a/polytope_feature/datacube/transformations/datacube_mappers/datacube_mappers.py
+++ b/polytope_feature/datacube/transformations/datacube_mappers/datacube_mappers.py
@@ -105,9 +105,6 @@ def unmap_path_key(self, key_value_path, leaf_path, unwanted_path, axis):
unwanted_path[axis.name] = unwanted_val
if axis.name == self._mapped_axes()[1]:
first_val = unwanted_path[self._mapped_axes()[0]]
- # unmapped_idx = [self.unmap(first_val, (val,)) for val in value]
- # print("AND HERE??")
- # print(values)
unmapped_idx = self.unmap(first_val, values)
leaf_path.pop(self._mapped_axes()[0], None)
key_value_path.pop(axis.name)
diff --git a/polytope_feature/datacube/transformations/datacube_mappers/mapper_types/__init__.py b/polytope_feature/datacube/transformations/datacube_mappers/mapper_types/__init__.py
index ba9a7b339..e69de29bb 100644
--- a/polytope_feature/datacube/transformations/datacube_mappers/mapper_types/__init__.py
+++ b/polytope_feature/datacube/transformations/datacube_mappers/mapper_types/__init__.py
@@ -1,5 +0,0 @@
-from .healpix import *
-from .local_regular import *
-from .octahedral import *
-from .reduced_ll import *
-from .regular import *
diff --git a/polytope_feature/datacube/transformations/datacube_mappers/mapper_types/healpix_nested.py b/polytope_feature/datacube/transformations/datacube_mappers/mapper_types/healpix_nested.py
index fe695469a..54335151e 100644
--- a/polytope_feature/datacube/transformations/datacube_mappers/mapper_types/healpix_nested.py
+++ b/polytope_feature/datacube/transformations/datacube_mappers/mapper_types/healpix_nested.py
@@ -1,6 +1,8 @@
import bisect
import math
+import numpy as np
+
from ..datacube_mappers import DatacubeMapper
@@ -12,6 +14,7 @@ def __init__(self, base_axis, mapped_axes, resolution, md5_hash=None, local_area
self._resolution = resolution
self._axis_reversed = {mapped_axes[0]: True, mapped_axes[1]: False}
self._first_axis_vals = self.first_axis_vals()
+ self._first_axis_vals_np_rounded = -np.round(np.array(self._first_axis_vals), decimals=8)
self.compressed_grid_axes = [self._mapped_axes[1]]
self.Nside = self._resolution
self.k = int(math.log2(self.Nside))
@@ -76,12 +79,10 @@ def HEALPix_nj(self, i):
def HEALPix_longitudes(self, i):
Nj = self.HEALPix_nj(i)
step = 360.0 / Nj
- start = (
- step / 2.0 if i < self._resolution or 3 * self._resolution - 1 < i or (i + self._resolution) % 2 else 0.0
+ start = np.where(
+ (i < self._resolution) | (3 * self._resolution - 1 < i) | ((i + self._resolution) % 2 == 1), step / 2.0, 0.0
)
-
- longitudes = [start + n * step for n in range(Nj)]
-
+ longitudes = start + np.arange(Nj) * step
return longitudes
def map_second_axis(self, first_val, lower, upper):
@@ -90,138 +91,124 @@ def map_second_axis(self, first_val, lower, upper):
return return_vals
def axes_idx_to_healpix_idx(self, first_idx, second_idx):
- idx = 0
- for i in range(self._resolution - 1):
- if i != first_idx:
- idx += 4 * (i + 1)
- else:
- idx += second_idx
- return idx
- for i in range(self._resolution - 1, 3 * self._resolution):
- if i != first_idx:
- idx += 4 * self._resolution
- else:
- idx += second_idx
- return idx
- for i in range(3 * self._resolution, 4 * self._resolution - 1):
- if i != first_idx:
- idx += 4 * (4 * self._resolution - 1 - i + 1)
- else:
- idx += second_idx
- return idx
-
- def find_second_idx(self, first_val, second_val):
- tol = 1e-10
- second_axis_vals = self.second_axis_vals(first_val)
- second_idx = bisect.bisect_left(second_axis_vals, second_val - tol)
- return second_idx
-
- def unmap_first_val_to_start_line_idx(self, first_val):
- tol = 1e-8
- first_val = [i for i in self._first_axis_vals if first_val - tol <= i <= first_val + tol][0]
- first_idx = self._first_axis_vals.index(first_val)
- idx = 0
- for i in range(self._resolution - 1):
- if i != first_idx:
- idx += 4 * (i + 1)
- else:
- return idx
- for i in range(self._resolution - 1, 3 * self._resolution):
- if i != first_idx:
- idx += 4 * self._resolution
- else:
- return idx
- for i in range(3 * self._resolution, 4 * self._resolution - 1):
- if i != first_idx:
- idx += 4 * (4 * self._resolution - 1 - i + 1)
- else:
- return idx
-
- def unmap(self, first_val, second_vals, unmapped_idx=None):
- tol = 1e-8
- first_value = [i for i in self._first_axis_vals if first_val[0] - tol <= i <= first_val[0] + tol][0]
- first_idx = self._first_axis_vals.index(first_value)
- healpix_idxs = []
- for second_val in second_vals:
- second_val = [i for i in self.second_axis_vals(first_val) if second_val - tol <= i <= second_val + tol][0]
- second_idx = self.second_axis_vals(first_val).index(second_val)
- healpix_index = self.axes_idx_to_healpix_idx(first_idx, second_idx)
- # TODO: here do conversion of ring to nested healpix representation before returning
- healpix_index = self.ring_to_nested(healpix_index)
- healpix_idxs.append(healpix_index)
- return healpix_idxs
+ res = self._resolution
+ sum1 = 2 * (res - 1) * res
+ sum2 = 2 * (((res - 1) * res) - ((4 * res - 1 - first_idx) * (4 * res - first_idx)))
+
+ if first_idx < res - 1:
+ return (2 * first_idx * (first_idx + 1)) + second_idx
+ elif first_idx < 3 * res:
+ return sum1 + (first_idx - (res - 1)) * (4 * res) + second_idx
+ else:
+ return sum1 + (2 * res + 1) * (4 * res) + sum2 + second_idx
+
+ def unmap(self, first_val, second_vals):
+ # Convert to NumPy array for fast computation
+ idx = np.searchsorted(self._first_axis_vals_np_rounded, -np.round(first_val[0], decimals=8))
+ if idx >= len(self._first_axis_vals_np_rounded):
+ return None
+ second_axis_vals = np.round(np.array(self.second_axis_vals_from_idx(idx)), decimals=8)
+ second_vals = np.round(np.array(second_vals), decimals=8)
+ second_idxs = np.searchsorted(second_axis_vals, second_vals)
+ valid_mask = second_idxs < len(second_axis_vals)
+ if not np.all(valid_mask):
+ return None
+ healpix_idxs = [self.axes_idx_to_healpix_idx(idx, sec_idx) for sec_idx in second_idxs]
+ return self.ring_to_nested(np.asarray(healpix_idxs)).tolist()
def div_03(self, a, b):
- t = 1 if a >= (b << 1) else 0
+ """Vectorized version of div_03"""
+ t = np.where(a >= (b << 1), 1, 0)
a -= t * (b << 1)
- return (t << 1) + (1 if a >= b else 0)
+ return (t << 1) + np.where(a >= b, 1, 0)
def pll(self, f):
- pll_values = [1, 3, 5, 7, 0, 2, 4, 6, 1, 3, 5, 7]
+ """Vectorized lookup for PLL values"""
+ pll_values = np.array([1, 3, 5, 7, 0, 2, 4, 6, 1, 3, 5, 7])
return pll_values[f]
def to_nest(self, f, ring, Nring, phi, shift):
- r = int(((2 + (f >> 2)) << self.k) - ring - 1)
- p = int(2 * phi - self.pll(f) * Nring - shift - 1)
- if p >= 2 * self.Nside:
- p -= 8 * self.Nside
- i = int((r + p)) >> 1
- j = int((r - p)) >> 1
+ """Vectorized to_nest conversion"""
+ r = ((2 + (f >> 2)) << self.k) - ring - 1
+ p = 2 * phi - self.pll(f) * Nring - shift - 1
+ p = np.where(p >= 2 * self.Nside, p - 8 * self.Nside, p)
+ i = (r + p) >> 1
+ j = (r - p) >> 1
return self.fij_to_nest(f, i, j, self.k)
def fij_to_nest(self, f, i, j, k):
- return (f << (2 * k)) + self.nest_encode_bits(i) + (self.nest_encode_bits(j) << 1)
+ """Vectorized nest encoding"""
+ return (
+ # (f.astype(np.uint64) << np.uint64(2 * k))
+ (f.astype(object) << (2 * k))
+ + self.nest_encode_bits(i)
+ + (self.nest_encode_bits(j).astype(np.uint64) << np.uint64(1))
+ )
def nest_encode_bits(self, i):
- __masks = [
- 0x00000000FFFFFFFF,
- 0x0000FFFF0000FFFF,
- 0x00FF00FF00FF00FF,
- 0x0F0F0F0F0F0F0F0F,
- 0x3333333333333333,
- 0x5555555555555555,
- ]
- i = int(i)
- b = i & __masks[0]
- b = (b ^ (b << 16)) & __masks[1]
- b = (b ^ (b << 8)) & __masks[2]
- b = (b ^ (b << 4)) & __masks[3]
- b = (b ^ (b << 2)) & __masks[4]
- b = (b ^ (b << 1)) & __masks[5]
- return b
-
- def ring_to_nested(self, idx):
- if idx < self.Ncap:
- # North polar cap
- Nring = (1 + self.int_sqrt(2 * idx + 1)) >> 1
- phi = 1 + idx - 2 * Nring * (Nring - 1)
- f = self.div_03(phi - 1, Nring)
- return self.to_nest(f, Nring, Nring, phi, 0)
-
- if self.Npix - self.Ncap <= idx:
- # South polar cap
- Nring = (1 + self.int_sqrt(2 * self.Npix - 2 * idx - 1)) >> 1
- phi = 1 + idx + 2 * Nring * (Nring - 1) + 4 * Nring - self.Npix
- ring = 4 * self.Nside - Nring # (from South pole)
- f = self.div_03(phi - 1, Nring) + 8
- return self.to_nest(f, ring, Nring, phi, 0)
- else:
- # Equatorial belt
- ip = idx - self.Ncap
- tmp = ip >> (self.k + 2)
-
- phi = ip - tmp * 4 * self.Nside + 1
- ring = tmp + self.Nside
+ """Vectorized bit manipulation for HEALPix indexing"""
+ __masks = np.array(
+ [
+ 0x00000000FFFFFFFF,
+ 0x0000FFFF0000FFFF,
+ 0x00FF00FF00FF00FF,
+ 0x0F0F0F0F0F0F0F0F,
+ 0x3333333333333333,
+ 0x5555555555555555,
+ ],
+ dtype=np.uint64,
+ )
- ifm = 1 + ((phi - 1 - ((1 + tmp) >> 1)) >> self.k)
- ifp = 1 + ((phi - 1 - ((1 - tmp + 2 * self.Nside) >> 1)) >> self.k)
- f = (ifp | 4) if ifp == ifm else (ifp if ifp < ifm else (ifm + 8))
+ b = i.astype(np.uint64) & __masks[0]
+ b = (b ^ (b << np.uint64(16))) & __masks[1]
+ b = (b ^ (b << np.uint64(8))) & __masks[2]
+ b = (b ^ (b << np.uint64(4))) & __masks[3]
+ b = (b ^ (b << np.uint64(2))) & __masks[4]
+ b = (b ^ (b << np.uint64(1))) & __masks[5]
+ return b
- return self.to_nest(f, ring, self.Nside, phi, ring & 1)
+ def int_sqrt(self, x):
+ """Efficient integer square root for arrays"""
+ return np.sqrt(x + 0.5).astype(int)
- def int_sqrt(self, i):
- return int(math.sqrt(i + 0.5))
+ def ring_to_nested(self, idx):
+ """Vectorized ring_to_nested conversion"""
+ # idx = np.asarray(idx) # Ensure input is an array
+
+ north_mask = idx < self.Ncap
+ south_mask = self.Npix - self.Ncap <= idx
+
+ # North polar cap
+ Nring_north = (1 + self.int_sqrt(2 * idx + 1)) >> 1
+ phi_north = 1 + idx - 2 * Nring_north * (Nring_north - 1)
+ f_north = self.div_03(phi_north - 1, Nring_north)
+ nested_north = self.to_nest(f_north, Nring_north, Nring_north, phi_north, 0)
+
+ # South polar cap
+ Nring_south = (1 + self.int_sqrt(2 * self.Npix - 2 * idx - 1)) >> 1
+ phi_south = 1 + idx + 2 * Nring_south * (Nring_south - 1) + 4 * Nring_south - self.Npix
+ ring_south = 4 * self.Nside - Nring_south
+ f_south = self.div_03(phi_south - 1, Nring_south) + 8
+ nested_south = self.to_nest(f_south, ring_south, Nring_south, phi_south, 0)
+
+ # Equatorial belt
+ ip = idx - self.Ncap
+ tmp = ip >> (self.k + 2)
+
+ phi_equatorial = ip - tmp * 4 * self.Nside + 1
+ ring_equatorial = tmp + self.Nside
+
+ ifm = 1 + ((phi_equatorial - 1 - ((1 + tmp) >> 1)) >> self.k)
+ ifp = 1 + ((phi_equatorial - 1 - ((1 - tmp + 2 * self.Nside) >> 1)) >> self.k)
+ f_equatorial = np.where(ifp == ifm, ifp | 4, np.where(ifp < ifm, ifp, ifm + 8))
+
+ nested_equatorial = self.to_nest(f_equatorial, ring_equatorial, self.Nside, phi_equatorial, ring_equatorial & 1)
+ nested_result = np.empty_like(idx) # Preallocate array for performance
+ nested_result[north_mask] = nested_north[north_mask]
+ nested_result[south_mask] = nested_south[south_mask]
+ nested_result[~(north_mask | south_mask)] = nested_equatorial[~(north_mask | south_mask)]
+ return nested_result
# md5 grid hash in form {resolution : hash}
diff --git a/polytope_feature/datacube/transformations/datacube_mappers/mapper_types/reduced_ll.py b/polytope_feature/datacube/transformations/datacube_mappers/mapper_types/reduced_ll.py
index 5319a4ade..18619dc06 100644
--- a/polytope_feature/datacube/transformations/datacube_mappers/mapper_types/reduced_ll.py
+++ b/polytope_feature/datacube/transformations/datacube_mappers/mapper_types/reduced_ll.py
@@ -5128,4 +5128,4 @@ def unmap(self, first_val, second_vals):
# md5 grid hash in form {resolution : hash}
-_md5_hash = {3601: "386742a2dd1201b67f2d19ed421353ea"}
+_md5_hash = {3601: "225e56fb2fdee272ca226dc265d08a0a"}
diff --git a/polytope_feature/datacube/transformations/datacube_reverse/datacube_reverse.py b/polytope_feature/datacube/transformations/datacube_reverse/datacube_reverse.py
index 4d3232910..7a4da9bc4 100644
--- a/polytope_feature/datacube/transformations/datacube_reverse/datacube_reverse.py
+++ b/polytope_feature/datacube/transformations/datacube_reverse/datacube_reverse.py
@@ -1,6 +1,7 @@
+import numpy as np
+
from ....utility.list_tools import bisect_left_cmp, bisect_right_cmp
from ..datacube_transformations import DatacubeAxisTransformation
-import numpy as np
class DatacubeAxisReverse(DatacubeAxisTransformation):
diff --git a/polytope_feature/engine/engine.py b/polytope_feature/engine/engine.py
index 653a6a2b8..ad8d84b09 100644
--- a/polytope_feature/engine/engine.py
+++ b/polytope_feature/engine/engine.py
@@ -1,12 +1,10 @@
+import math
from typing import List
from ..datacube.backends.datacube import Datacube
-from ..datacube.tensor_index_tree import TensorIndexTree
-from ..shapes import ConvexPolytope, Product
-
from ..datacube.datacube_axis import UnsliceableDatacubeAxis
+from ..shapes import ConvexPolytope, Product
from ..utility.list_tools import unique
-import math
class Engine:
@@ -78,8 +76,5 @@ def extract(self, datacube: Datacube, polytopes: List[ConvexPolytope]):
self.find_compressed_axes(datacube, polytopes)
self.remove_compressed_axis_in_union(polytopes)
self.pre_process_polytopes(datacube, polytopes)
- # assert isinstance(datacube, QubedDatacube)
tree = self.build_tree(polytopes, datacube)
- print("WHAT DOES THE TREE LOOK LIKE??")
- print(tree)
return tree
diff --git a/polytope_feature/engine/hullslicer.py b/polytope_feature/engine/hullslicer.py
index e326ee536..cb51cbe7c 100644
--- a/polytope_feature/engine/hullslicer.py
+++ b/polytope_feature/engine/hullslicer.py
@@ -1,11 +1,7 @@
from copy import copy
-from typing import List
-
-from ..datacube.backends.datacube import Datacube
from ..datacube.tensor_index_tree import TensorIndexTree
-from ..shapes import ConvexPolytope
-from ..utility.combinatorics import group, tensor_product, find_polytope_combinations
+from ..utility.combinatorics import find_polytope_combinations, group, tensor_product
from ..utility.exceptions import UnsliceableShapeError
from .engine import Engine
from .slicing_tools import slice
diff --git a/polytope_feature/engine/qubed_slicer.py b/polytope_feature/engine/qubed_slicer.py
index b71f9f2f2..fac8e9611 100644
--- a/polytope_feature/engine/qubed_slicer.py
+++ b/polytope_feature/engine/qubed_slicer.py
@@ -1,18 +1,18 @@
-
+import numpy as np
+import pandas as pd
from qubed import Qube
from qubed.value_types import QEnum
-from qubed.set_operations import union
-from .slicing_tools import slice
-from ..datacube.backends.qubed import QubedDatacube
-from .engine import Engine
-import pandas as pd
+
from ..datacube.datacube_axis import UnsliceableDatacubeAxis
from ..datacube.transformations.datacube_mappers.datacube_mappers import DatacubeMapper
-from ..shapes import ConvexPolytope, Product
-from ..utility.combinatorics import group, tensor_product, find_polytopes_on_axis, find_polytope_combinations
-from typing import List
-
-from ..datacube.backends.datacube import Datacube
+from ..utility.combinatorics import (
+ find_polytope_combinations,
+ find_polytopes_on_axis,
+ group,
+ tensor_product,
+)
+from .engine import Engine
+from .slicing_tools import slice
class QubedSlicer(Engine):
@@ -27,14 +27,16 @@ def find_datacube_vals():
def find_values_between(self, polytope, ax, node, datacube, lower, upper, path=None):
if isinstance(ax, UnsliceableDatacubeAxis):
- return [v for v in node.values if lower <= v <= upper]
+ filtered = [(i, v) for i, v in enumerate(node.values) if lower <= v <= upper]
+ indices, values = zip(*filtered) if filtered else ([], [])
+ return values, indices
tol = ax.tol
lower = ax.from_float(lower - tol)
upper = ax.from_float(upper + tol)
method = polytope.method
- values = datacube.get_indices(path, node, ax, lower, upper, method)
- return values
+ values, indexes = datacube.get_indices(path, node, ax, lower, upper, method)
+ return values, indexes
def get_sliced_polys(self, found_vals, ax, child_name, poly, slice_axis_idx):
sliced_polys = []
@@ -48,8 +50,7 @@ def get_sliced_polys(self, found_vals, ax, child_name, poly, slice_axis_idx):
def find_children_polytopes(self, polytopes, poly, sliced_polys):
child_polytopes = [p for p in polytopes if p != poly]
- child_polytopes.extend(
- [sliced_poly_ for sliced_poly_ in sliced_polys if sliced_poly_ is not None])
+ child_polytopes.extend([sliced_poly_ for sliced_poly_ in sliced_polys if sliced_poly_ is not None])
return child_polytopes
def find_new_vals(self, found_vals, ax):
@@ -63,41 +64,75 @@ def find_new_vals(self, found_vals, ax):
new_found_vals.append(found_val)
return new_found_vals
- def build_branch(self, real_uncompressed_axis, found_vals, sliced_polys, polytopes, poly, child, datacube, datacube_transformations, ax):
+ def build_branch(
+ self,
+ real_uncompressed_axis,
+ found_vals,
+ sliced_polys,
+ polytopes,
+ poly,
+ child,
+ datacube,
+ datacube_transformations,
+ ax,
+ idxs=None,
+ metadata_idx_stack=None,
+ ):
final_children_and_vals = []
if real_uncompressed_axis:
for i, found_val in enumerate(found_vals):
- sliced_polys_ = [sliced_polys[i]]
+ if i < len(sliced_polys):
+ sliced_polys_ = [sliced_polys[i]]
+ else:
+ sliced_polys_ = sliced_polys
child_polytopes = self.find_children_polytopes(polytopes, poly, sliced_polys_)
- children = self._slice(child, child_polytopes, datacube, datacube_transformations)
+ if idxs:
+ current_metadata_idx_stack = metadata_idx_stack + [[idxs[i]]]
+ children = self._slice(child, child_polytopes, datacube, datacube_transformations,
+ current_metadata_idx_stack)
# If this node used to have children but now has none due to filtering, skip it.
if child.children and not children:
continue
new_found_vals = self.find_new_vals([found_val], ax)
- final_children_and_vals.append((children, new_found_vals))
+
+ if idxs:
+ request_child_val = (children, new_found_vals, current_metadata_idx_stack)
+ else:
+ request_child_val = (children, new_found_vals)
+ final_children_and_vals.append(request_child_val)
else:
# if it's compressed, then can add all found values in a single node
child_polytopes = self.find_children_polytopes(polytopes, poly, sliced_polys)
# create children
- children = self._slice(child, child_polytopes, datacube, datacube_transformations)
+ if idxs:
+ current_metadata_idx_stack = metadata_idx_stack + [idxs]
+ children = self._slice(child, child_polytopes, datacube, datacube_transformations,
+ current_metadata_idx_stack)
# If this node used to have children but now has none due to filtering, skip it.
if child.children and not children:
return None
new_found_vals = self.find_new_vals(found_vals, ax)
- final_children_and_vals.append((children, new_found_vals))
+ if idxs:
+ request_child_val = (children, new_found_vals, current_metadata_idx_stack)
+ else:
+ request_child_val = (children, new_found_vals)
+ final_children_and_vals.append(request_child_val)
+
if len(final_children_and_vals) == 0:
return None
return final_children_and_vals
- def _slice(self, q: Qube, polytopes, datacube, datacube_transformations) -> list[Qube]:
+ def _slice(self, q: Qube, polytopes, datacube, datacube_transformations, metadata_idx_stack=None) -> list[Qube]:
result = []
+ if metadata_idx_stack is None:
+ metadata_idx_stack = [[0]]
+
if len(q.children) == 0:
# add "fake" axes and their nodes in order -> what about merged axes??
mapper_transformation = None
- # for transformation in list(datacube_transformations.values()):
for transformation in datacube_transformations:
if isinstance(transformation, DatacubeMapper):
mapper_transformation = transformation
@@ -115,7 +150,7 @@ def _slice(self, q: Qube, polytopes, datacube, datacube_transformations) -> list
ax = datacube._axes[grid_axes[0]]
lower, upper, slice_axis_idx = poly.extents(grid_axes[0])
- found_vals = self.find_values_between(poly, ax, None, datacube, lower, upper)
+ found_vals, _ = self.find_values_between(poly, ax, None, datacube, lower, upper)
if len(found_vals) == 0:
continue
@@ -123,7 +158,6 @@ def _slice(self, q: Qube, polytopes, datacube, datacube_transformations) -> list
sliced_polys = self.get_sliced_polys(found_vals, ax, grid_axes[0], poly, slice_axis_idx)
# decide if axis should be compressed or not according to polytope
# NOTE: actually the first grid axis will never be compressed
- # axis_compressed = (grid_axes[0] in self.compressed_axes)
# if it's not compressed, need to separate into different nodes to append to the tree
for i, found_val in enumerate(found_vals):
@@ -136,7 +170,13 @@ def _slice(self, q: Qube, polytopes, datacube, datacube_transformations) -> list
flattened_path = {grid_axes[0]: (found_val,)}
# get second axis children through slicing
children = self._slice_second_grid_axis(
- grid_axes[1], child_polytopes, datacube, datacube_transformations, second_axis_vals, flattened_path)
+ grid_axes[1],
+ child_polytopes,
+ datacube,
+ datacube_transformations,
+ second_axis_vals,
+ flattened_path,
+ )
# If this node used to have children but now has none due to filtering, skip it.
if not children:
continue
@@ -148,10 +188,9 @@ def _slice(self, q: Qube, polytopes, datacube, datacube_transformations) -> list
# TODO: when we have an axis that we would like to merge with another, we should skip the node creation here
# and instead keep/cache the value to merge with the node from before??
- qube_node = Qube.make(key=grid_axes[0],
- values=QEnum([found_val]),
- metadata={},
- children=children)
+ qube_node = Qube.make_node(
+ key=grid_axes[0], values=QEnum([found_val]), metadata={}, children=children
+ )
result.append(qube_node)
for i, child in enumerate(q.children):
@@ -160,40 +199,66 @@ def _slice(self, q: Qube, polytopes, datacube, datacube_transformations) -> list
# TODO: here add the axes to datacube backend with transformations for child.key
# TODO: update the datacube axis_options before we dynamically change the axes
+
+ # TODO: this is slow... will need to make it faster and only do this when we need...
datacube.add_axes_dynamically(child)
# here now first change the values in the polytopes on the axis to reflect the axis type
-
for poly in polytopes_on_axis:
ax = datacube._axes[child.key]
# find extents of polytope on child.key
lower, upper, slice_axis_idx = poly.extents(child.key)
# find values on child that are within extents
- found_vals = self.find_values_between(poly, ax, child, datacube, lower, upper)
+ found_vals, idxs = self.find_values_between(poly, ax, child, datacube, lower, upper)
+
+ # TODO: find the indexes of the found_vals wrt child.values, to extract the right metadata that we want to keep inside self.build_branch
if len(found_vals) == 0:
continue
sliced_polys = self.get_sliced_polys(found_vals, ax, child.key, poly, slice_axis_idx)
# decide if axis should be compressed or not according to polytope
- axis_compressed = (child.key in self.compressed_axes)
+ axis_compressed = child.key in self.compressed_axes
real_uncompressed_axis = not axis_compressed and len(found_vals) > 1
final_children_and_vals = self.build_branch(
- real_uncompressed_axis, found_vals, sliced_polys, polytopes, poly, child, datacube, datacube_transformations, ax)
+ real_uncompressed_axis,
+ found_vals,
+ sliced_polys,
+ polytopes,
+ poly,
+ child,
+ datacube,
+ datacube_transformations,
+ ax,
+ idxs,
+ metadata_idx_stack
+ )
if final_children_and_vals is None:
continue
- result.extend([Qube.make(
- key=child.key,
- values=QEnum(new_found_vals),
- metadata=child.metadata,
- children=children
- ) for (children, new_found_vals) in final_children_and_vals])
+
+ def find_metadata(metadata_idx):
+ metadata = {}
+ for k, vs in child.metadata.items():
+ metadata_depth = len(vs.shape)
+ relevant_metadata_idxs = metadata_idx[:metadata_depth]
+ ix = np.ix_(*relevant_metadata_idxs)
+ metadata[k] = vs[ix]
+ return metadata
+
+ for children, new_found_vals, current_metadata_idxs in final_children_and_vals:
+ metadata = find_metadata(current_metadata_idxs)
+ qube_node = Qube.make_node(
+ key=child.key, values=QEnum(new_found_vals), metadata=metadata, children=children
+ )
+ result.append(qube_node)
return result
- def _slice_second_grid_axis(self, axis_name, polytopes, datacube, datacube_transformations, second_axis_vals, path) -> list[Qube]:
+ def _slice_second_grid_axis(
+ self, axis_name, polytopes, datacube, datacube_transformations, second_axis_vals, path
+ ) -> list[Qube]:
result = []
polytopes_on_axis = find_polytopes_on_axis(axis_name, polytopes)
@@ -201,7 +266,7 @@ def _slice_second_grid_axis(self, axis_name, polytopes, datacube, datacube_trans
ax = datacube._axes[axis_name]
lower, upper, slice_axis_idx = poly.extents(axis_name)
- found_vals = self.find_values_between(poly, ax, None, datacube, lower, upper, path)
+ found_vals, _ = self.find_values_between(poly, ax, None, datacube, lower, upper, path)
if len(found_vals) == 0:
continue
@@ -221,18 +286,14 @@ def _slice_second_grid_axis(self, axis_name, polytopes, datacube, datacube_trans
# NOTE this was the last axis so we do not have children...
- result.extend([Qube.make(
- key=axis_name,
- values=QEnum(new_found_vals),
- metadata={},
- children={}
- )])
+ result.extend([Qube.make_node(key=axis_name, values=QEnum(
+ new_found_vals), metadata={"result": []}, children={})])
return result
def slice_tree(self, datacube, final_polys):
q = datacube.q
datacube_transformations = datacube.datacube_transformations
- return Qube.root_node(self._slice(q, final_polys, datacube, datacube_transformations))
+ return Qube.make_root(self._slice(q, final_polys, datacube, datacube_transformations))
def build_tree(self, polytopes_to_slice, datacube):
groups, input_axes = group(polytopes_to_slice)
@@ -250,5 +311,5 @@ def build_tree(self, polytopes_to_slice, datacube):
final_tree = sub_trees[0]
for sub_tree in sub_trees[1:]:
- union(final_tree, sub_tree)
+ final_tree | sub_tree
return final_tree
diff --git a/polytope_feature/options.py b/polytope_feature/options.py
index 584dc9b04..020c7007e 100644
--- a/polytope_feature/options.py
+++ b/polytope_feature/options.py
@@ -3,6 +3,7 @@
from conflator import ConfigModel
from pydantic import ConfigDict
+
from .datacube.datacube_axis import DatacubeAxis
diff --git a/polytope_feature/polytope.py b/polytope_feature/polytope.py
index 0f806c8c7..ad2cc561a 100644
--- a/polytope_feature/polytope.py
+++ b/polytope_feature/polytope.py
@@ -46,8 +46,13 @@ def __init__(self, datacube, engine=None, options=None, context=None):
if options is None:
options = {}
- axis_options, compressed_axes_options, config, alternative_axes, datacube_axes = PolytopeOptions.get_polytope_options(
- options)
+ (
+ axis_options,
+ compressed_axes_options,
+ config,
+ alternative_axes,
+ datacube_axes,
+ ) = PolytopeOptions.get_polytope_options(options)
self.context = context
self.datacube = Datacube.create(
diff --git a/polytope_feature/version.py b/polytope_feature/version.py
index d0ab53db9..a82b376d2 100644
--- a/polytope_feature/version.py
+++ b/polytope_feature/version.py
@@ -1 +1 @@
-__version__ = "1.0.39"
+__version__ = "1.1.1"
diff --git a/readme.md b/readme.md
index 05eed3c21..2aaaf1fdf 100644
--- a/readme.md
+++ b/readme.md
@@ -13,6 +13,9 @@
+
+
+
Concept •
@@ -30,8 +33,11 @@ Polytope is designed to extend different datacube backends:
Polytope supports datacubes which have branching, non-uniform indexing, and even cyclic axes. If the datacube backend supports byte-addressability and efficient random access (either in-memory or direct from storage), **Polytope** can be used to dramatically decrease overall I/O load.
-> [!WARNING]
-> This project is BETA and will be experimental for the foreseeable future. Interfaces and functionality are likely to change, and the project itself may be scrapped. DO NOT use this software in any project/software that is operational.
+> \[!IMPORTANT\]
+> This software is **Incubating** and subject to ECMWF's guidelines on [Software Maturity](https://github.com/ecmwf/codex/raw/refs/heads/main/Project%20Maturity).
+
+
## Concept
diff --git a/tests/conftest.py b/tests/conftest.py
new file mode 100644
index 000000000..1ea51bb91
--- /dev/null
+++ b/tests/conftest.py
@@ -0,0 +1,101 @@
+import os
+import pathlib
+import shutil
+
+import pyfdb
+import pytest
+import requests
+import yaml
+
+
+class HTTPError(Exception):
+ def __init__(self, status_code, message):
+ self.status_code = status_code
+ self.message = message
+ super().__init__(f"HTTPError {status_code}: {message}")
+
+
+@pytest.fixture(scope="session")
+def shared_temp_data_dir(tmp_path_factory):
+ """
+ Provides a unique temp directory for the entire test session.
+ """
+ temp_dir = tmp_path_factory.mktemp("shared_fdb_data")
+ return temp_dir
+
+
+@pytest.fixture(scope="session")
+def downloaded_data_test_files(shared_temp_data_dir):
+ """
+ Downloads all required GRIB test files once per session.
+ """
+ files_to_download = [
+ ("https://sites.ecmwf.int/repository/polytope/test-data/foo.grib", "foo.grib"),
+ ("https://sites.ecmwf.int/repository/polytope/test-data/local.grib", "local.grib"),
+ ("https://sites.ecmwf.int/repository/polytope/test-data/aifs_data_from_fdb.grib", "aifs_data_from_fdb.grib"),
+ ("https://sites.ecmwf.int/repository/polytope/test-data/wind_gust_and_t2m.grib", "wind_gust_and_t2m.grib"),
+ ("https://sites.ecmwf.int/repository/polytope/test-data/t2m_jan_3_v2.grib", "t2m_jan_3_v2.grib"),
+ ("https://sites.ecmwf.int/repository/polytope/test-data/wave_spectra.grib", "wave_spectra.grib"),
+ ("https://sites.ecmwf.int/repository/polytope/test-data/era5-levels-members.grib", "era5-levels-members.grib"),
+ ]
+
+ downloaded_paths = []
+
+ for url, filename in files_to_download:
+ path = shared_temp_data_dir / filename
+ if not path.exists():
+ response = requests.get(url)
+ if response.status_code != 200:
+ raise HTTPError(response.status_code, f"Failed to download {url}")
+ with open(path, "wb") as f:
+ f.write(response.content)
+ downloaded_paths.append(path)
+
+ return downloaded_paths
+
+
+@pytest.fixture(scope="session")
+def fdb_path(request) -> pathlib.Path:
+ """
+ Provides path to test data at '