From 27d6de83ac0ceee43bb268d188e0d79223571699 Mon Sep 17 00:00:00 2001 From: mathleur Date: Mon, 24 Feb 2025 11:53:43 +0100 Subject: [PATCH 01/28] start to incorporate qubed latest --- .gitignore | 3 +- .../engine/qubed_polytope_intersection.py | 63 +++++++++++++++++++ 2 files changed, 65 insertions(+), 1 deletion(-) create mode 100644 polytope_feature/engine/qubed_polytope_intersection.py diff --git a/.gitignore b/.gitignore index 4634ed76f..b880c0b3f 100644 --- a/.gitignore +++ b/.gitignore @@ -26,4 +26,5 @@ new_updated_numpy_venv newest-polytope-venv serializedTree new_polytope_venv -*.json \ No newline at end of file +*.json +venv_python3_11 \ No newline at end of file diff --git a/polytope_feature/engine/qubed_polytope_intersection.py b/polytope_feature/engine/qubed_polytope_intersection.py new file mode 100644 index 000000000..7338e46d9 --- /dev/null +++ b/polytope_feature/engine/qubed_polytope_intersection.py @@ -0,0 +1,63 @@ +from typing import List + +from ..datacube.tensor_index_tree import TensorIndexTree +from ..shapes import ConvexPolytope +from ..utility.combinatorics import group, tensor_product +from .engine import Engine +from ..utility.list_tools import unique +from qubed import Qube + + +class QubedSlicing(Engine): + def __init__(self): + self.datacube = ?? + + def create_fake_datacube_mappers(self): + # TODO + self.datacube_mappers = ?? + pass + + def create_request_polys(self, polytopes): + for p in polytopes: + self._unique_continuous_points(p) + + groups, input_axes = group(polytopes) + combinations = tensor_product(groups) + return combinations + + def _unique_continuous_points(self, p: ConvexPolytope): + for i, ax in enumerate(p._axes): + mapper = self.datacube_mappers.get(ax, None) + for j, val in enumerate(p.points): + p.points[j][i] = mapper.to_float(mapper.parse(p.points[j][i])) + # Remove duplicate points + unique(p.points) + + def extract(self, datacube, polytopes: List[ConvexPolytope]): + combinations = self.create_request_polys(polytopes) + + # TODO: replace all the TensorIndexTrees with Qube trees + + # request = TensorIndexTree() + + # for c in combinations: + # r = TensorIndexTree() + # new_c = [] + # for combi in c: + # if isinstance(combi, list): + # new_c.extend(combi) + # else: + # new_c.append(combi) + # r["unsliced_polytopes"] = set(new_c) + # current_nodes = [r] + # for ax in datacube.axes.values(): + # next_nodes = [] + # interm_next_nodes = [] + # for node in current_nodes: + # self._build_branch(ax, node, datacube, interm_next_nodes) + # next_nodes.extend(interm_next_nodes) + # interm_next_nodes = [] + # current_nodes = next_nodes + + # request.merge(r) + # return request From a13861a26a0178651e4ccc1d5b587d1da4c1dc63 Mon Sep 17 00:00:00 2001 From: mathleur Date: Mon, 24 Feb 2025 11:54:36 +0100 Subject: [PATCH 02/28] start to incorporate qubed latest --- polytope_feature/engine/qubed_polytope_intersection.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/polytope_feature/engine/qubed_polytope_intersection.py b/polytope_feature/engine/qubed_polytope_intersection.py index 7338e46d9..82fc6b17a 100644 --- a/polytope_feature/engine/qubed_polytope_intersection.py +++ b/polytope_feature/engine/qubed_polytope_intersection.py @@ -36,6 +36,11 @@ def _unique_continuous_points(self, p: ConvexPolytope): def extract(self, datacube, polytopes: List[ConvexPolytope]): combinations = self.create_request_polys(polytopes) + request = Qube.empty() + + for c in combinations: + r = Qube.empty() + # TODO: replace all the TensorIndexTrees with Qube trees # request = TensorIndexTree() From c415f7b32608b585aa01ec3fe88277565b8c1e8f Mon Sep 17 00:00:00 2001 From: mathleur Date: Mon, 24 Feb 2025 21:32:09 +0100 Subject: [PATCH 03/28] add qubed tests --- .../datacube/backends/qubed_fdb.py | 403 ++++++++++++++++++ .../datacube/backends/test_qubed_slicing.py | 147 +++++++ .../engine/qubed_polytope_intersection.py | 115 ++--- tests/test_ecmwf_oper_data_fdb.py | 7 +- 4 files changed, 615 insertions(+), 57 deletions(-) create mode 100644 polytope_feature/datacube/backends/qubed_fdb.py create mode 100644 polytope_feature/datacube/backends/test_qubed_slicing.py diff --git a/polytope_feature/datacube/backends/qubed_fdb.py b/polytope_feature/datacube/backends/qubed_fdb.py new file mode 100644 index 000000000..dcd8b31fa --- /dev/null +++ b/polytope_feature/datacube/backends/qubed_fdb.py @@ -0,0 +1,403 @@ +import logging +import operator +from copy import deepcopy +from itertools import product + +from ...utility.exceptions import BadGridError, BadRequestError +from ...utility.geometry import nearest_pt +from .datacube import Datacube, TensorIndexTree +from qubed import Qube +import requests + + +class FDBDatacube(Datacube): + def __init__( + self, gj, config=None, axis_options=None, compressed_axes_options=[], alternative_axes=[], context=None + ): + if config is None: + config = {} + if context is None: + context = {} + + super().__init__(axis_options, compressed_axes_options) + + logging.info("Created an FDB datacube with options: " + str(axis_options)) + + self.unwanted_path = {} + self.axis_options = axis_options + + # partial_request = config + # Find values in the level 3 FDB datacube + + self.gj = gj + self.fdb_tree = Qube.from_json(requests.get( + "https://github.com/ecmwf/qubed/raw/refs/heads/main/tests/example_qubes/climate_dt.json").json()) + + if len(alternative_axes) == 0: + logging.info("Find GribJump axes for %s", context) + # TODO: change to get the axes from + # self.fdb_coordinates = self.gj.axes(partial_request, ctx=context) + self.fdb_coordinates = self.fdb_tree.axes() + logging.info("Retrieved available GribJump axes for %s", context) + if len(self.fdb_coordinates) == 0: + raise BadRequestError({}) + else: + self.fdb_coordinates = {} + for axis_config in alternative_axes: + self.fdb_coordinates[axis_config.axis_name] = axis_config.values + + fdb_coordinates_copy = deepcopy(self.fdb_coordinates) + for axis, vals in fdb_coordinates_copy.items(): + if len(vals) == 1: + if vals[0] == "": + self.fdb_coordinates.pop(axis) + + logging.info("Axes returned from GribJump are: " + str(self.fdb_coordinates)) + + self.fdb_coordinates["values"] = [] + for name, values in self.fdb_coordinates.items(): + values.sort() + options = None + for opt in self.axis_options: + if opt.axis_name == name: + options = opt + + self._check_and_add_axes(options, name, values) + self.treated_axes.append(name) + self.complete_axes.append(name) + + # add other options to axis which were just created above like "lat" for the mapper transformations for eg + for name in self._axes: + if name not in self.treated_axes: + options = None + for opt in self.axis_options: + if opt.axis_name == name: + options = opt + + val = self._axes[name].type + self._check_and_add_axes(options, name, val) + + logging.info("Polytope created axes for %s", self._axes.keys()) + + # def check_branching_axes(self, request): + # polytopes = request.polytopes() + # for polytope in polytopes: + # for ax in polytope._axes: + # if ax == "levtype": + # (upper, lower, idx) = polytope.extents(ax) + # if "sfc" in polytope.points[idx]: + # self.fdb_coordinates.pop("levelist", None) + + # if ax == "param": + # (upper, lower, idx) = polytope.extents(ax) + # if "140251" not in polytope.points[idx]: + # self.fdb_coordinates.pop("direction", None) + # self.fdb_coordinates.pop("frequency", None) + # else: + # # special param with direction and frequency + # if len(polytope.points[idx]) > 1: + # raise ValueError( + # "Param 251 is part of a special branching of the datacube. Please request it separately." # noqa: E501 + # ) + # self.fdb_coordinates.pop("quantile", None) + # self.fdb_coordinates.pop("year", None) + # self.fdb_coordinates.pop("month", None) + + # # NOTE: verify that we also remove the axis object for axes we've removed here + # axes_to_remove = set(self.complete_axes) - set(self.fdb_coordinates.keys()) + + # # Remove the keys from self._axes + # for axis_name in axes_to_remove: + # self._axes.pop(axis_name, None) + + def get(self, requests: TensorIndexTree, context=None): + if context is None: + context = {} + if len(requests.children) == 0: + return requests + fdb_requests = [] + fdb_requests_decoding_info = [] + self.get_fdb_requests(requests, fdb_requests, fdb_requests_decoding_info) + + # here, loop through the fdb requests and request from gj and directly add to the nodes + complete_list_complete_uncompressed_requests = [] + complete_fdb_decoding_info = [] + for j, compressed_request in enumerate(fdb_requests): + uncompressed_request = {} + + # Need to determine the possible decompressed requests + + # find the possible combinations of compressed indices + interm_branch_tuple_values = [] + for key in compressed_request[0].keys(): + interm_branch_tuple_values.append(compressed_request[0][key]) + request_combis = product(*interm_branch_tuple_values) + + # Need to extract the possible requests and add them to the right nodes + for combi in request_combis: + uncompressed_request = {} + for i, key in enumerate(compressed_request[0].keys()): + uncompressed_request[key] = combi[i] + complete_uncompressed_request = (uncompressed_request, compressed_request[1], self.grid_md5_hash) + complete_list_complete_uncompressed_requests.append(complete_uncompressed_request) + complete_fdb_decoding_info.append(fdb_requests_decoding_info[j]) + + if logging.root.level <= logging.DEBUG: + printed_list_to_gj = complete_list_complete_uncompressed_requests[::1000] + logging.debug("The requests we give GribJump are: %s", printed_list_to_gj) + logging.info("Requests given to GribJump extract for %s", context) + try: + output_values = self.gj.extract(complete_list_complete_uncompressed_requests, context) + except Exception as e: + if "BadValue: Grid hash mismatch" in str(e): + logging.info("Error is: %s", e) + raise BadGridError() + else: + raise e + + logging.info("Requests extracted from GribJump for %s", context) + if logging.root.level <= logging.DEBUG: + printed_output_values = output_values[::1000] + logging.debug("GribJump outputs: %s", printed_output_values) + self.assign_fdb_output_to_nodes(output_values, complete_fdb_decoding_info) + + def get_fdb_requests( + self, + requests: TensorIndexTree, + fdb_requests=[], + fdb_requests_decoding_info=[], + leaf_path=None, + ): + if leaf_path is None: + leaf_path = {} + + # First when request node is root, go to its children + if requests.axis.name == "root": + logging.debug("Looking for data for the tree") + + for c in requests.children: + self.get_fdb_requests(c, fdb_requests, fdb_requests_decoding_info) + # If request node has no children, we have a leaf so need to assign fdb values to it + else: + key_value_path = {requests.axis.name: requests.values} + ax = requests.axis + (key_value_path, leaf_path, self.unwanted_path) = ax.unmap_path_key( + key_value_path, leaf_path, self.unwanted_path + ) + leaf_path.update(key_value_path) + if len(requests.children[0].children[0].children) == 0: + # find the fdb_requests and associated nodes to which to add results + (path, current_start_idxs, fdb_node_ranges, lat_length) = self.get_2nd_last_values(requests, leaf_path) + ( + original_indices, + sorted_request_ranges, + fdb_node_ranges, + ) = self.sort_fdb_request_ranges(current_start_idxs, lat_length, fdb_node_ranges) + fdb_requests.append((path, sorted_request_ranges)) + fdb_requests_decoding_info.append((original_indices, fdb_node_ranges)) + + # Otherwise remap the path for this key and iterate again over children + else: + for c in requests.children: + self.get_fdb_requests(c, fdb_requests, fdb_requests_decoding_info, leaf_path) + + def remove_duplicates_in_request_ranges(self, fdb_node_ranges, current_start_idxs): + seen_indices = set() + for i, idxs_list in enumerate(current_start_idxs): + for k, sub_lat_idxs in enumerate(idxs_list): + actual_fdb_node = fdb_node_ranges[i][k] + original_fdb_node_range_vals = [] + new_current_start_idx = [] + for j, idx in enumerate(sub_lat_idxs): + if idx not in seen_indices: + # NOTE: need to remove it from the values in the corresponding tree node + # NOTE: need to read just the range we give to gj + original_fdb_node_range_vals.append(actual_fdb_node[0].values[j]) + seen_indices.add(idx) + new_current_start_idx.append(idx) + if original_fdb_node_range_vals != []: + actual_fdb_node[0].values = tuple(original_fdb_node_range_vals) + else: + # there are no values on this node anymore so can remove it + actual_fdb_node[0].remove_branch() + if len(new_current_start_idx) == 0: + current_start_idxs[i].pop(k) + else: + current_start_idxs[i][k] = new_current_start_idx + return (fdb_node_ranges, current_start_idxs) + + def nearest_lat_lon_search(self, requests): + if len(self.nearest_search) != 0: + first_ax_name = requests.children[0].axis.name + second_ax_name = requests.children[0].children[0].axis.name + + if first_ax_name not in self.nearest_search.keys() or second_ax_name not in self.nearest_search.keys(): + raise Exception("nearest point search axes are wrong") + + second_ax = requests.children[0].children[0].axis + + nearest_pts = [ + [lat_val, second_ax._remap_val_to_axis_range(lon_val)] + for (lat_val, lon_val) in zip( + self.nearest_search[first_ax_name][0], self.nearest_search[second_ax_name][0] + ) + ] + + found_latlon_pts = [] + for lat_child in requests.children: + for lon_child in lat_child.children: + found_latlon_pts.append([lat_child.values, lon_child.values]) + + # now find the nearest lat lon to the points requested + nearest_latlons = [] + for pt in nearest_pts: + nearest_latlon = nearest_pt(found_latlon_pts, pt) + nearest_latlons.append(nearest_latlon) + + # need to remove the branches that do not fit + lat_children_values = [child.values for child in requests.children] + for i in range(len(lat_children_values)): + lat_child_val = lat_children_values[i] + lat_child = [child for child in requests.children if child.values == lat_child_val][0] + if lat_child.values not in [(latlon[0],) for latlon in nearest_latlons]: + lat_child.remove_branch() + else: + possible_lons = [latlon[1] for latlon in nearest_latlons if (latlon[0],) == lat_child.values] + lon_children_values = [child.values for child in lat_child.children] + for j in range(len(lon_children_values)): + lon_child_val = lon_children_values[j] + lon_child = [child for child in lat_child.children if child.values == lon_child_val][0] + for value in lon_child.values: + if value not in possible_lons: + lon_child.remove_compressed_branch(value) + + def get_2nd_last_values(self, requests, leaf_path=None): + if leaf_path is None: + leaf_path = {} + # In this function, we recursively loop over the last two layers of the tree and store the indices of the + # request ranges in those layers + self.nearest_lat_lon_search(requests) + + lat_length = len(requests.children) + current_start_idxs = [False] * lat_length + fdb_node_ranges = [False] * lat_length + for i in range(len(requests.children)): + lat_child = requests.children[i] + lon_length = len(lat_child.children) + current_start_idxs[i] = [None] * lon_length + fdb_node_ranges[i] = [[TensorIndexTree.root for y in range(lon_length)] for x in range(lon_length)] + current_start_idx = deepcopy(current_start_idxs[i]) + fdb_range_nodes = deepcopy(fdb_node_ranges[i]) + key_value_path = {lat_child.axis.name: lat_child.values} + ax = lat_child.axis + (key_value_path, leaf_path, self.unwanted_path) = ax.unmap_path_key( + key_value_path, leaf_path, self.unwanted_path + ) + leaf_path.update(key_value_path) + (current_start_idxs[i], fdb_node_ranges[i]) = self.get_last_layer_before_leaf( + lat_child, leaf_path, current_start_idx, fdb_range_nodes + ) + + leaf_path_copy = deepcopy(leaf_path) + leaf_path_copy.pop("values", None) + return (leaf_path_copy, current_start_idxs, fdb_node_ranges, lat_length) + + def get_last_layer_before_leaf(self, requests, leaf_path, current_idx, fdb_range_n): + current_idx = [[] for i in range(len(requests.children))] + fdb_range_n = [[] for i in range(len(requests.children))] + for i, c in enumerate(requests.children): + # now c are the leaves of the initial tree + key_value_path = {c.axis.name: c.values} + ax = c.axis + (key_value_path, leaf_path, self.unwanted_path) = ax.unmap_path_key( + key_value_path, leaf_path, self.unwanted_path + ) + # TODO: change this to accommodate non consecutive indexes being compressed too + current_idx[i].extend(key_value_path["values"]) + fdb_range_n[i].append(c) + return (current_idx, fdb_range_n) + + def assign_fdb_output_to_nodes(self, output_values, fdb_requests_decoding_info): + for k in range(len(output_values)): + request_output_values = output_values[k] + ( + original_indices, + fdb_node_ranges, + ) = fdb_requests_decoding_info[k] + sorted_fdb_range_nodes = [fdb_node_ranges[i] for i in original_indices] + for i in range(len(sorted_fdb_range_nodes)): + n = sorted_fdb_range_nodes[i][0] + if len(request_output_values[0]) == 0: + # If we are here, no data was found for this path in the fdb + none_array = [None] * len(n.values) + n.result.extend(none_array) + else: + interm_request_output_values = request_output_values[0][i][0] + n.result.extend(interm_request_output_values) + + def sort_fdb_request_ranges(self, current_start_idx, lat_length, fdb_node_ranges): + (new_fdb_node_ranges, new_current_start_idx) = self.remove_duplicates_in_request_ranges( + fdb_node_ranges, current_start_idx + ) + interm_request_ranges = [] + # TODO: modify the start indexes to have as many arrays as the request ranges + new_fdb_node_ranges = [] + for i in range(lat_length): + interm_fdb_nodes = fdb_node_ranges[i] + old_interm_start_idx = current_start_idx[i] + for j in range(len(old_interm_start_idx)): + # TODO: if we sorted the cyclic values in increasing order on the tree too, + # then we wouldn't have to sort here? + sorted_list = sorted(enumerate(old_interm_start_idx[j]), key=lambda x: x[1]) + original_indices_idx, interm_start_idx = zip(*sorted_list) + for interm_fdb_nodes_obj in interm_fdb_nodes[j]: + interm_fdb_nodes_obj.values = tuple([interm_fdb_nodes_obj.values[k] for k in original_indices_idx]) + if abs(interm_start_idx[-1] + 1 - interm_start_idx[0]) <= len(interm_start_idx): + current_request_ranges = (interm_start_idx[0], interm_start_idx[-1] + 1) + interm_request_ranges.append(current_request_ranges) + new_fdb_node_ranges.append(interm_fdb_nodes[j]) + else: + jumps = list(map(operator.sub, interm_start_idx[1:], interm_start_idx[:-1])) + last_idx = 0 + for k, jump in enumerate(jumps): + if jump > 1: + current_request_ranges = (interm_start_idx[last_idx], interm_start_idx[k] + 1) + new_fdb_node_ranges.append(interm_fdb_nodes[j]) + last_idx = k + 1 + interm_request_ranges.append(current_request_ranges) + if k == len(interm_start_idx) - 2: + current_request_ranges = (interm_start_idx[last_idx], interm_start_idx[-1] + 1) + interm_request_ranges.append(current_request_ranges) + new_fdb_node_ranges.append(interm_fdb_nodes[j]) + request_ranges_with_idx = list(enumerate(interm_request_ranges)) + sorted_list = sorted(request_ranges_with_idx, key=lambda x: x[1][0]) + original_indices, sorted_request_ranges = zip(*sorted_list) + return (original_indices, sorted_request_ranges, new_fdb_node_ranges) + + def datacube_natural_indexes(self, axis, subarray): + indexes = subarray.get(axis.name, None) + return indexes + + def select(self, path, unmapped_path): + return self.fdb_coordinates + + def ax_vals(self, name): + return self.fdb_coordinates.get(name, None) + + def prep_tree_encoding(self, node, unwanted_path=None): + # TODO: prepare the tree for protobuf encoding + # ie transform all axes for gribjump and adding the index property on the leaves + if unwanted_path is None: + unwanted_path = {} + + ax = node.axis + (new_node, unwanted_path) = ax.unmap_tree_node(node, unwanted_path) + + if len(node.children) != 0: + for c in new_node.children: + self.prep_tree_encoding(c, unwanted_path) + + def prep_tree_decoding(self, tree): + # TODO: transform the tree after decoding from protobuf + # ie unstransform all axes from gribjump and put the indexes back as a leaf/extra node + pass diff --git a/polytope_feature/datacube/backends/test_qubed_slicing.py b/polytope_feature/datacube/backends/test_qubed_slicing.py new file mode 100644 index 000000000..ea4906f98 --- /dev/null +++ b/polytope_feature/datacube/backends/test_qubed_slicing.py @@ -0,0 +1,147 @@ +from qubed import Qube +from qubed.value_types import QEnum +from typing import Iterator +# from ...shapes import ConvexPolytope + +q = Qube.from_dict({ + "class=od": { + "expver=0001": {"param=1/2/3/4/5": {}}, + "expver=0002": {"param=1": {}, "param=2": {}}, + }, + "class=rd": { + "expver=0001": { + "param=1/2/3": {}, + "expver=0001": {"param=1/2/3": {}, } + }, + "expver=0002": {"param=1/2/3/4": {}}, + }, +}).compress() + +# polytopes_list = [ConvexPolytope(), ] + + +# def slice_poly(q: Qube, polytope): +# def _slice_poly(q: Qube, poly): +# for child in q.children: +# # For each child, find the polytopes we should slice on that axis +# right_unsliced_polytopes = [] +# for polytope in q.metadata["unsliced_polytopes"]: +# if q.key in polytope._axes: +# right_unsliced_polytopes.append(polytope) + +# for i, polytope in enumerate(right_unsliced_polytopes): +# lower, upper, slice_axis_idx = polytope.extents(q.key) + + +def slice(q: Qube, request: dict) -> 'Qube': + def _slice(q: Qube, r: dict) -> Iterator[Qube]: + for child in q.children: + requested_values = r.get(child.key, []) + found_values = [v for v in requested_values if v in child.values] + if not found_values: + continue + truncated_request = {k: v for k, v in r.items() if k != child.key} + children = list(_slice(child, truncated_request)) + + # If this node used to have children, i.e was not a leaf node, + # but as a result of filtering now has no children + # then filter it out. + if child.children and not children: + continue + + yield Qube.make( + key=child.key, + values=QEnum(found_values), + metadata=child.metadata, + children=children, + ) + + return Qube.root_node(list(_slice(q, request))) + + +request = { + "expver": ["0001"], + "class": ["rd", "od"], + "param": ["1", "2", "3"], +} + + +print(q) +# q = slice(q, request) + +# print(q) + +# new_q = Qube.from_dict({ +# "expver=0001": {"param=1/2/3/4/5": {"level=0/1/2": {}}}, +# "expver=0001": {"param=1": {"level=3/4": {}}, "param=2": {"level=3/4": {}}}, +# }).compress() + +new_q = Qube.from_dict({ + "expver=0001": {"param=1/2/3/4/5": {"level=0/1/2": {}}, "param=1": {"level=3/4": {}}, "param=2": {"level=3/4": {}}}, + # "expver=0001": {"param=1": {"level=3/4": {}}, "param=2": {"level=3/4": {}}}, +}).compress() + +print("HERE") +print(new_q) + +print(new_q["expver", "0001"].children) + + +def modified_slice(q: Qube, request: dict) -> 'Qube': + def _slice(q: Qube, r: dict) -> Iterator[Qube]: + for child in q.children: + requested_values = r.get(child.key, []) + found_values = [v for v in requested_values if v in child.values] + if not found_values: + continue + print("HERE") + print(r.items()) + for k, v in r.items(): + if k == "param": + if "1" in v: + truncated_request = {"level": "2"} + children = list(_slice(child, truncated_request)) + if "2" in v: + truncated_request = {"level": ["1", "2", "3"]} + children = list(_slice(child, truncated_request)) + if "3" in v: + truncated_request = {"level": "2"} + children = list(_slice(child, truncated_request)) + else: + truncated_request = {k: v for k, v in r.items() if k != child.key} + children = list(_slice(child, truncated_request)) + + # If this node used to have children, i.e was not a leaf node, + # but as a result of filtering now has no children + # then filter it out. + if child.children and not children: + continue + + # for child in children: + # yield Qube.make( + # key=child.key, + # values=QEnum(found_values), + # metadata=child.metadata, + # children=list(child), + # ) + print("WHAT NODES DID WE CREATE?") + print(Qube.make( + key=child.key, + values=QEnum(found_values), + metadata=child.metadata, + children=children, + )) + yield Qube.make( + key=child.key, + values=QEnum(found_values), + metadata=child.metadata, + children=children, + ) + + return Qube.root_node(list(_slice(q, request))) + + +print(modified_slice(new_q, request={ + "expver": ["0001"], + "param": ["1", "2", "3"], +})) diff --git a/polytope_feature/engine/qubed_polytope_intersection.py b/polytope_feature/engine/qubed_polytope_intersection.py index 82fc6b17a..3585b3d63 100644 --- a/polytope_feature/engine/qubed_polytope_intersection.py +++ b/polytope_feature/engine/qubed_polytope_intersection.py @@ -1,68 +1,75 @@ -from typing import List +# from typing import List -from ..datacube.tensor_index_tree import TensorIndexTree -from ..shapes import ConvexPolytope -from ..utility.combinatorics import group, tensor_product -from .engine import Engine -from ..utility.list_tools import unique -from qubed import Qube +# from ..datacube.tensor_index_tree import TensorIndexTree +# from ..shapes import ConvexPolytope +# from ..utility.combinatorics import group, tensor_product +# from .engine import Engine +# from ..utility.list_tools import unique +# from qubed import Qube -class QubedSlicing(Engine): - def __init__(self): - self.datacube = ?? +# class QubedSlicing(Engine): +# def __init__(self): +# self.datacube = ?? - def create_fake_datacube_mappers(self): - # TODO - self.datacube_mappers = ?? - pass +# def create_fake_datacube_mappers(self): +# # TODO +# self.datacube_mappers = ?? +# pass - def create_request_polys(self, polytopes): - for p in polytopes: - self._unique_continuous_points(p) +# def create_request_polys(self, polytopes): +# for p in polytopes: +# self._unique_continuous_points(p) - groups, input_axes = group(polytopes) - combinations = tensor_product(groups) - return combinations +# groups, input_axes = group(polytopes) +# combinations = tensor_product(groups) +# return combinations - def _unique_continuous_points(self, p: ConvexPolytope): - for i, ax in enumerate(p._axes): - mapper = self.datacube_mappers.get(ax, None) - for j, val in enumerate(p.points): - p.points[j][i] = mapper.to_float(mapper.parse(p.points[j][i])) - # Remove duplicate points - unique(p.points) +# def _unique_continuous_points(self, p: ConvexPolytope): +# for i, ax in enumerate(p._axes): +# mapper = self.datacube_mappers.get(ax, None) +# for j, val in enumerate(p.points): +# p.points[j][i] = mapper.to_float(mapper.parse(p.points[j][i])) +# # Remove duplicate points +# unique(p.points) - def extract(self, datacube, polytopes: List[ConvexPolytope]): - combinations = self.create_request_polys(polytopes) +# def extract(self, datacube, polytopes: List[ConvexPolytope]): +# combinations = self.create_request_polys(polytopes) - request = Qube.empty() +# request = Qube.empty() - for c in combinations: - r = Qube.empty() +# for c in combinations: +# r = Qube.empty() +# new_c = [] +# for combi in c: +# if isinstance(combi, list): +# new_c.extend(combi) +# else: +# new_c.append(combi) +# r.set - # TODO: replace all the TensorIndexTrees with Qube trees +# # TODO: replace all the TensorIndexTrees with Qube trees - # request = TensorIndexTree() +# # request = TensorIndexTree() - # for c in combinations: - # r = TensorIndexTree() - # new_c = [] - # for combi in c: - # if isinstance(combi, list): - # new_c.extend(combi) - # else: - # new_c.append(combi) - # r["unsliced_polytopes"] = set(new_c) - # current_nodes = [r] - # for ax in datacube.axes.values(): - # next_nodes = [] - # interm_next_nodes = [] - # for node in current_nodes: - # self._build_branch(ax, node, datacube, interm_next_nodes) - # next_nodes.extend(interm_next_nodes) - # interm_next_nodes = [] - # current_nodes = next_nodes +# # for c in combinations: +# # r = TensorIndexTree() +# # new_c = [] +# # for combi in c: +# # if isinstance(combi, list): +# # new_c.extend(combi) +# # else: +# # new_c.append(combi) +# # r["unsliced_polytopes"] = set(new_c) +# # current_nodes = [r] +# # for ax in datacube.axes.values(): +# # next_nodes = [] +# # interm_next_nodes = [] +# # for node in current_nodes: +# # self._build_branch(ax, node, datacube, interm_next_nodes) +# # next_nodes.extend(interm_next_nodes) +# # interm_next_nodes = [] +# # current_nodes = next_nodes - # request.merge(r) - # return request +# # request.merge(r) +# # return request diff --git a/tests/test_ecmwf_oper_data_fdb.py b/tests/test_ecmwf_oper_data_fdb.py index e848716c8..2efdfe4a8 100644 --- a/tests/test_ecmwf_oper_data_fdb.py +++ b/tests/test_ecmwf_oper_data_fdb.py @@ -56,7 +56,8 @@ def test_fdb_datacube(self): Select("class", ["od"]), Select("stream", ["oper"]), Select("type", ["fc"]), - Box(["latitude", "longitude"], [0, 0], [0.2, 0.2]), + # Box(["latitude", "longitude"], [0, 0], [0.2, 0.2]), + Box(["latitude", "longitude"], [0, 0], [80, 80]), ) self.fdbdatacube = gj.GribJump() self.slicer = HullSlicer() @@ -67,8 +68,8 @@ def test_fdb_datacube(self): ) result = self.API.retrieve(request) result.pprint() - assert len(result.leaves) == 3 - assert len(result.leaves[0].result) == 3 + # assert len(result.leaves) == 3 + # assert len(result.leaves[0].result) == 3 @pytest.mark.fdb def test_fdb_datacube_point(self): From a411cc8bcbda64bf3a35a8b0cba87d27eea89c89 Mon Sep 17 00:00:00 2001 From: mathleur Date: Tue, 25 Feb 2025 17:20:29 +0100 Subject: [PATCH 04/28] qubed tree slicing --- .../backends/test_qubed_extraction.py | 24 ++ .../datacube/backends/test_qubed_slicing.py | 234 ++++++++++++------ .../engine/qubed_polytope_intersection.py | 162 ++++++------ tests/test_qubed_extraction.py | 65 +++++ 4 files changed, 339 insertions(+), 146 deletions(-) create mode 100644 polytope_feature/datacube/backends/test_qubed_extraction.py create mode 100644 tests/test_qubed_extraction.py diff --git a/polytope_feature/datacube/backends/test_qubed_extraction.py b/polytope_feature/datacube/backends/test_qubed_extraction.py new file mode 100644 index 000000000..5d3204db5 --- /dev/null +++ b/polytope_feature/datacube/backends/test_qubed_extraction.py @@ -0,0 +1,24 @@ +from qubed import Qube +import requests + +from ...shapes import ConvexPolytope + + +fdb_tree = Qube.from_json(requests.get( + "https://github.com/ecmwf/qubed/raw/refs/heads/main/tests/example_qubes/climate_dt.json").json()) + +fdb_tree.print() +combi_polytopes = [ + # ConvexPolytope() +] + +# Select("step", [0]), +# Select("levtype", ["sfc"]), +# Select("date", [pd.Timestamp("20231102T000000")]), +# Select("domain", ["g"]), +# Select("expver", ["0001"]), +# Select("param", ["167"]), +# Select("class", ["od"]), +# Select("stream", ["oper"]), +# Select("type", ["fc"]), +# Box(["latitude", "longitude"], [0, 0], [80, 80]), diff --git a/polytope_feature/datacube/backends/test_qubed_slicing.py b/polytope_feature/datacube/backends/test_qubed_slicing.py index ea4906f98..9ddf833fd 100644 --- a/polytope_feature/datacube/backends/test_qubed_slicing.py +++ b/polytope_feature/datacube/backends/test_qubed_slicing.py @@ -33,30 +33,64 @@ # lower, upper, slice_axis_idx = polytope.extents(q.key) +# def slice(q: Qube, request: dict) -> 'Qube': +# def _slice(q: Qube, r: dict) -> Iterator[Qube]: +# for child in q.children: +# requested_values = r.get(child.key, []) +# found_values = [v for v in requested_values if v in child.values] +# if not found_values: +# continue +# truncated_request = {k: v for k, v in r.items() if k != child.key} +# children = list(_slice(child, truncated_request)) + +# # If this node used to have children, i.e was not a leaf node, +# # but as a result of filtering now has no children +# # then filter it out. +# if child.children and not children: +# continue + +# yield Qube.make( +# key=child.key, +# values=QEnum(found_values), +# metadata=child.metadata, +# children=children, +# ) + +# return Qube.root_node(list(_slice(q, request))) + def slice(q: Qube, request: dict) -> 'Qube': - def _slice(q: Qube, r: dict) -> Iterator[Qube]: + def _slice(q: Qube, r: dict) -> list[Qube]: + result = [] for child in q.children: requested_values = r.get(child.key, []) found_values = [v for v in requested_values if v in child.values] if not found_values: continue truncated_request = {k: v for k, v in r.items() if k != child.key} - children = list(_slice(child, truncated_request)) + children = _slice(child, truncated_request) - # If this node used to have children, i.e was not a leaf node, - # but as a result of filtering now has no children - # then filter it out. + # If this node used to have children but now has none due to filtering, skip it. if child.children and not children: continue - yield Qube.make( - key=child.key, - values=QEnum(found_values), - metadata=child.metadata, - children=children, - ) + if len(found_values) > 1: + result.extend([Qube.make( + key=child.key, + values=QEnum(val), + metadata=child.metadata, + children=children, + ) for val in found_values]) + else: + result.extend([Qube.make( + key=child.key, + values=QEnum(found_values), + metadata=child.metadata, + children=children + )]) + + return result - return Qube.root_node(list(_slice(q, request))) + return Qube.root_node(_slice(q, request)) request = { @@ -67,81 +101,139 @@ def _slice(q: Qube, r: dict) -> Iterator[Qube]: print(q) -# q = slice(q, request) +q = slice(q, request) -# print(q) +print(q) # new_q = Qube.from_dict({ # "expver=0001": {"param=1/2/3/4/5": {"level=0/1/2": {}}}, # "expver=0001": {"param=1": {"level=3/4": {}}, "param=2": {"level=3/4": {}}}, # }).compress() -new_q = Qube.from_dict({ - "expver=0001": {"param=1/2/3/4/5": {"level=0/1/2": {}}, "param=1": {"level=3/4": {}}, "param=2": {"level=3/4": {}}}, - # "expver=0001": {"param=1": {"level=3/4": {}}, "param=2": {"level=3/4": {}}}, -}).compress() +# new_q = Qube.from_dict({ +# "expver=0001": {"param=1/2/3/4/5": {"level=0/1/2": {}}, "param=1": {"level=3/4": {}}, "param=2": {"level=3/4": {}}}, +# # "expver=0001": {"param=1": {"level=3/4": {}}, "param=2": {"level=3/4": {}}}, +# }).compress() -print("HERE") -print(new_q) +# print("HERE") +# print(new_q) -print(new_q["expver", "0001"].children) +# print(new_q["expver", "0001"].children) -def modified_slice(q: Qube, request: dict) -> 'Qube': - def _slice(q: Qube, r: dict) -> Iterator[Qube]: +# def modified_slice(q: Qube, request: dict) -> 'Qube': +# def _slice(q: Qube, r: dict) -> Iterator[Qube]: +# for child in q.children: +# requested_values = r.get(child.key, []) +# found_values = [v for v in requested_values if v in child.values] +# if not found_values: +# continue +# print("HERE") +# print(r.items()) +# for k, v in r.items(): +# if k == "param": +# children = [] +# if "1" in found_values: +# truncated_request = {"level": "2"} +# children.extend(list(_slice(child, truncated_request))) +# if "2" in found_values: +# truncated_request = {"level": ["1", "2", "3"]} +# children.extend(list(_slice(child, truncated_request))) +# if "3" in found_values: +# truncated_request = {"level": "2"} +# children.extend(list(_slice(child, truncated_request))) +# else: +# truncated_request = {k: v for k, v in r.items() if k != child.key} +# children = list(_slice(child, truncated_request)) + +# # If this node used to have children, i.e was not a leaf node, +# # but as a result of filtering now has no children +# # then filter it out. +# if child.children and not children: +# continue + +# # for child in children: +# # yield Qube.make( +# # key=child.key, +# # values=QEnum(found_values), +# # metadata=child.metadata, +# # children=list(child), +# # ) +# print("WHAT NODES DID WE CREATE?") +# print(Qube.make( +# key=child.key, +# values=QEnum(found_values), +# metadata=child.metadata, +# children=children, +# )) +# yield Qube.make( +# key=child.key, +# values=QEnum(found_values), +# metadata=child.metadata, +# children=children, +# ) + +# return Qube.root_node(list(_slice(q, request))) + + +# print(modified_slice(new_q, request={ +# "expver": ["0001"], +# "param": ["1", "2", "3"], +# })) + + +def actual_slice(q: Qube, polytopes_to_slice) -> 'Qube': + + def find_polytopes_on_axis(q: Qube, polytopes): + polytopes_on_axis = [] + axis_name = q.key + for poly in polytopes: + if axis_name in poly._axes: + polytopes_on_axis.append(poly) + return polytopes_on_axis + + def _slice(q: Qube, polytopes) -> list[Qube]: + result = [] for child in q.children: + # TODO: find polytopes which are defined on axis child.key + polytopes_on_axis = find_polytopes_on_axis(child, polytopes) + # TODO: for each polytope: + for poly in polytopes_on_axis: + # TODO: find extents of polytope on child.key + lower, upper, slice_axis_idx = poly.extents(child.key) + # TODO: find values on child that are within extents + found_vals = [v for v in child.values if lower <= v <= upper] + # TODO: slice polytope along each value on child and keep resulting polytopes in memory + + # TODO: remove polytope from the polytope list and append the sliced polytopes + # TODO: with these new polytopes, recurse and create children etc... + pass requested_values = r.get(child.key, []) found_values = [v for v in requested_values if v in child.values] if not found_values: continue - print("HERE") - print(r.items()) - for k, v in r.items(): - if k == "param": - if "1" in v: - truncated_request = {"level": "2"} - children = list(_slice(child, truncated_request)) - if "2" in v: - truncated_request = {"level": ["1", "2", "3"]} - children = list(_slice(child, truncated_request)) - if "3" in v: - truncated_request = {"level": "2"} - children = list(_slice(child, truncated_request)) - else: - truncated_request = {k: v for k, v in r.items() if k != child.key} - children = list(_slice(child, truncated_request)) - - # If this node used to have children, i.e was not a leaf node, - # but as a result of filtering now has no children - # then filter it out. + truncated_request = {k: v for k, v in r.items() if k != child.key} + children = _slice(child, truncated_request) + + # If this node used to have children but now has none due to filtering, skip it. if child.children and not children: continue - # for child in children: - # yield Qube.make( - # key=child.key, - # values=QEnum(found_values), - # metadata=child.metadata, - # children=list(child), - # ) - print("WHAT NODES DID WE CREATE?") - print(Qube.make( - key=child.key, - values=QEnum(found_values), - metadata=child.metadata, - children=children, - )) - yield Qube.make( - key=child.key, - values=QEnum(found_values), - metadata=child.metadata, - children=children, - ) - - return Qube.root_node(list(_slice(q, request))) - - -print(modified_slice(new_q, request={ - "expver": ["0001"], - "param": ["1", "2", "3"], -})) + if len(found_values) > 1: + result.extend([Qube.make( + key=child.key, + values=QEnum(val), + metadata=child.metadata, + children=children, + ) for val in found_values]) + else: + result.extend([Qube.make( + key=child.key, + values=QEnum(found_values), + metadata=child.metadata, + children=children + )]) + + return result + + return Qube.root_node(_slice(q, polytopes_to_slice)) diff --git a/polytope_feature/engine/qubed_polytope_intersection.py b/polytope_feature/engine/qubed_polytope_intersection.py index 3585b3d63..4f8b1a7a4 100644 --- a/polytope_feature/engine/qubed_polytope_intersection.py +++ b/polytope_feature/engine/qubed_polytope_intersection.py @@ -1,75 +1,87 @@ -# from typing import List - -# from ..datacube.tensor_index_tree import TensorIndexTree -# from ..shapes import ConvexPolytope -# from ..utility.combinatorics import group, tensor_product -# from .engine import Engine -# from ..utility.list_tools import unique -# from qubed import Qube - - -# class QubedSlicing(Engine): -# def __init__(self): -# self.datacube = ?? - -# def create_fake_datacube_mappers(self): -# # TODO -# self.datacube_mappers = ?? -# pass - -# def create_request_polys(self, polytopes): -# for p in polytopes: -# self._unique_continuous_points(p) - -# groups, input_axes = group(polytopes) -# combinations = tensor_product(groups) -# return combinations - -# def _unique_continuous_points(self, p: ConvexPolytope): -# for i, ax in enumerate(p._axes): -# mapper = self.datacube_mappers.get(ax, None) -# for j, val in enumerate(p.points): -# p.points[j][i] = mapper.to_float(mapper.parse(p.points[j][i])) -# # Remove duplicate points -# unique(p.points) - -# def extract(self, datacube, polytopes: List[ConvexPolytope]): -# combinations = self.create_request_polys(polytopes) - -# request = Qube.empty() - -# for c in combinations: -# r = Qube.empty() -# new_c = [] -# for combi in c: -# if isinstance(combi, list): -# new_c.extend(combi) -# else: -# new_c.append(combi) -# r.set - -# # TODO: replace all the TensorIndexTrees with Qube trees - -# # request = TensorIndexTree() - -# # for c in combinations: -# # r = TensorIndexTree() -# # new_c = [] -# # for combi in c: -# # if isinstance(combi, list): -# # new_c.extend(combi) -# # else: -# # new_c.append(combi) -# # r["unsliced_polytopes"] = set(new_c) -# # current_nodes = [r] -# # for ax in datacube.axes.values(): -# # next_nodes = [] -# # interm_next_nodes = [] -# # for node in current_nodes: -# # self._build_branch(ax, node, datacube, interm_next_nodes) -# # next_nodes.extend(interm_next_nodes) -# # interm_next_nodes = [] -# # current_nodes = next_nodes - -# # request.merge(r) -# # return request +from typing import List + +from ..datacube.tensor_index_tree import TensorIndexTree +from ..shapes import ConvexPolytope +from ..utility.combinatorics import group, tensor_product +from .engine import Engine +from ..utility.list_tools import unique +from qubed import Qube + + +class QubedSlicing(Engine): + def __init__(self): + self.datacube = ?? + + def create_fake_datacube_mappers(self): + # TODO + self.datacube_mappers = ?? + pass + + def create_request_polys(self, polytopes): + for p in polytopes: + self._unique_continuous_points(p) + + groups, input_axes = group(polytopes) + combinations = tensor_product(groups) + return combinations + + def _unique_continuous_points(self, p: ConvexPolytope): + for i, ax in enumerate(p._axes): + mapper = self.datacube_mappers.get(ax, None) + for j, val in enumerate(p.points): + p.points[j][i] = mapper.to_float(mapper.parse(p.points[j][i])) + # Remove duplicate points + unique(p.points) + + # def build_tree(self, combination): + + # unsliced_polytopes = set(combination) + + # def _build_tree(self, q: Qube): + # for child in q.children: + # # Find the axis object + # ax = self.datacube_mappers[child.key] + # self.build_branch() + + def extract(self, datacube, polytopes: List[ConvexPolytope]): + combinations = self.create_request_polys(polytopes) + + request = Qube.empty() + + for c in combinations: + new_c = [] + for combi in c: + if isinstance(combi, list): + new_c.extend(combi) + else: + new_c.append(combi) + + # r = build_tree # TODO + # pass + # r.set + + # TODO: replace all the TensorIndexTrees with Qube trees + + # request = TensorIndexTree() + + # for c in combinations: + # r = TensorIndexTree() + # new_c = [] + # for combi in c: + # if isinstance(combi, list): + # new_c.extend(combi) + # else: + # new_c.append(combi) + # r["unsliced_polytopes"] = set(new_c) + # current_nodes = [r] + # for ax in datacube.axes.values(): + # next_nodes = [] + # interm_next_nodes = [] + # for node in current_nodes: + # self._build_branch(ax, node, datacube, interm_next_nodes) + # next_nodes.extend(interm_next_nodes) + # interm_next_nodes = [] + # current_nodes = next_nodes + + # request.merge(r) + # return request diff --git a/tests/test_qubed_extraction.py b/tests/test_qubed_extraction.py new file mode 100644 index 000000000..4a543056a --- /dev/null +++ b/tests/test_qubed_extraction.py @@ -0,0 +1,65 @@ +from qubed import Qube +import requests +from polytope_feature.datacube.datacube_axis import PandasTimedeltaDatacubeAxis, PandasTimestampDatacubeAxis, UnsliceableDatacubeAxis + +from polytope_feature.shapes import ConvexPolytope + + +fdb_tree = Qube.from_json(requests.get( + "https://github.com/ecmwf/qubed/raw/refs/heads/main/tests/example_qubes/climate_dt.json").json()) + +print(fdb_tree.axes().keys()) + + +combi_polytopes = [ + ConvexPolytope(["param"], [["168"]]), + ConvexPolytope(["time"], [["0000"], ["1200"]]), + ConvexPolytope(["resolution"], [["high"]]), + ConvexPolytope(["type"], [["fc"]]), + ConvexPolytope(["model"], ['ifs-nemo']), + ConvexPolytope(["stream"], [["clte"]]), + ConvexPolytope(["realization"], ["1"]), + ConvexPolytope(["expver"], [['0001']]), + ConvexPolytope(["experiment"], [['ssp3-7.0']]), + ConvexPolytope(["generation"], [["1"]]), + ConvexPolytope(["levtype"], [["sfc"]]), + ConvexPolytope(["activity"], [["scenariomip"]]), + ConvexPolytope(["dataset"], [["climate-dt"]]), + ConvexPolytope(["class"], [["d1"]]), + ConvexPolytope(["date"], [["20190221", "20190223"]]) +] + +datacube_axes = {"param": UnsliceableDatacubeAxis(), + "time": PandasTimedeltaDatacubeAxis(), + "resolution": UnsliceableDatacubeAxis(), + "type": UnsliceableDatacubeAxis(), + "model": UnsliceableDatacubeAxis(), + "stream": UnsliceableDatacubeAxis(), + "realization": UnsliceableDatacubeAxis(), + "expver": UnsliceableDatacubeAxis(), + "experiment": UnsliceableDatacubeAxis(), + "generation": UnsliceableDatacubeAxis(), + "levtype": UnsliceableDatacubeAxis(), + "activity": UnsliceableDatacubeAxis(), + "dataset": UnsliceableDatacubeAxis(), + "class": UnsliceableDatacubeAxis(), + "date": PandasTimestampDatacubeAxis()} + + +# TODO: treat the transformations to talk to the qubed tree, maybe do it + +# TODO: start iterating fdb_tree and creating a new request tree + +print(fdb_tree.) + + +# Select("step", [0]), +# Select("levtype", ["sfc"]), +# Select("date", [pd.Timestamp("20231102T000000")]), +# Select("domain", ["g"]), +# Select("expver", ["0001"]), +# Select("param", ["167"]), +# Select("class", ["od"]), +# Select("stream", ["oper"]), +# Select("type", ["fc"]), +# Box(["latitude", "longitude"], [0, 0], [80, 80]), From 422f4c13d2a4303c65603f66a31680bc6c3c823e Mon Sep 17 00:00:00 2001 From: mathleur Date: Fri, 28 Feb 2025 13:45:47 +0100 Subject: [PATCH 05/28] add qubed slice operaiton --- .../datacube/backends/test_qubed_slicing.py | 113 ++++++++++++------ tests/test_qubed_extraction.py | 2 +- 2 files changed, 80 insertions(+), 35 deletions(-) diff --git a/polytope_feature/datacube/backends/test_qubed_slicing.py b/polytope_feature/datacube/backends/test_qubed_slicing.py index 9ddf833fd..47e014234 100644 --- a/polytope_feature/datacube/backends/test_qubed_slicing.py +++ b/polytope_feature/datacube/backends/test_qubed_slicing.py @@ -1,6 +1,8 @@ from qubed import Qube from qubed.value_types import QEnum from typing import Iterator +from ...engine.hullslicer import slice +from copy import deepcopy # from ...shapes import ConvexPolytope q = Qube.from_dict({ @@ -192,48 +194,91 @@ def find_polytopes_on_axis(q: Qube, polytopes): polytopes_on_axis.append(poly) return polytopes_on_axis + def _axes_compressed(): + return {} + def _slice(q: Qube, polytopes) -> list[Qube]: result = [] for child in q.children: - # TODO: find polytopes which are defined on axis child.key + # find polytopes which are defined on axis child.key polytopes_on_axis = find_polytopes_on_axis(child, polytopes) - # TODO: for each polytope: + # for each polytope: for poly in polytopes_on_axis: - # TODO: find extents of polytope on child.key + # find extents of polytope on child.key lower, upper, slice_axis_idx = poly.extents(child.key) - # TODO: find values on child that are within extents + # find values on child that are within extents found_vals = [v for v in child.values if lower <= v <= upper] - # TODO: slice polytope along each value on child and keep resulting polytopes in memory - - # TODO: remove polytope from the polytope list and append the sliced polytopes - # TODO: with these new polytopes, recurse and create children etc... - pass - requested_values = r.get(child.key, []) - found_values = [v for v in requested_values if v in child.values] - if not found_values: - continue - truncated_request = {k: v for k, v in r.items() if k != child.key} - children = _slice(child, truncated_request) - - # If this node used to have children but now has none due to filtering, skip it. - if child.children and not children: - continue - - if len(found_values) > 1: - result.extend([Qube.make( - key=child.key, - values=QEnum(val), - metadata=child.metadata, - children=children, - ) for val in found_values]) - else: - result.extend([Qube.make( - key=child.key, - values=QEnum(found_values), - metadata=child.metadata, - children=children - )]) + # slice polytope along each value on child and keep resulting polytopes in memory + sliced_polys = [] + for val in found_vals: + # slice polytope along the value and add sliced polytope to list of polytopes in memory + sliced_poly = slice(poly, child.key, val, slice_axis_idx) + sliced_polys.append(sliced_poly) + + # decide if axis should be compressed or not according to polytope + axis_compressed = _axes_compressed.get(child.key, False) + # if it's not compressed, need to separate into different nodes to append to the tree + if not axis_compressed and len(found_vals) > 1: + for i, found_val in enumerate(found_vals): + child_polytopes = deepcopy(polytopes) + child_polytopes.remove(poly) + child_polytopes.append(sliced_polys[i]) + children = _slice(child, child_polytopes) + # If this node used to have children but now has none due to filtering, skip it. + if child.children and not children: + continue + # TODO: add the child_polytopes to the child.metadata/ ie change child.metadata here before passing + qube_node = Qube.make(key=child.key, + values=QEnum(found_val), + metadata=child.metadata, + children=children) + result.append(qube_node) + else: + # if it's compressed, then can add all found values in a single node + child_polytopes = deepcopy(polytopes) + child_polytopes.remove(poly) + child_polytopes.extend(sliced_polys) + # create children + children = _slice(child, child_polytopes) + # If this node used to have children but now has none due to filtering, skip it. + if child.children and not children: + continue + # TODO: add the child_polytopes to the child.metadata/ ie change child.metadata here before passing + result.extend([Qube.make( + key=child.key, + values=QEnum(found_vals), + metadata=child.metadata, + children=children + )]) return result return Qube.root_node(_slice(q, polytopes_to_slice)) + + +# TODO: OLD CODE TO MODIFY + # requested_values = r.get(child.key, []) + # found_values = [v for v in requested_values if v in child.values] + # if not found_values: + # continue + # truncated_request = {k: v for k, v in r.items() if k != child.key} + # children = _slice(child, truncated_request) + + # # If this node used to have children but now has none due to filtering, skip it. + # if child.children and not children: + # continue + + # if len(found_values) > 1: + # result.extend([Qube.make( + # key=child.key, + # values=QEnum(val), + # metadata=child.metadata, + # children=children, + # ) for val in found_values]) + # else: + # result.extend([Qube.make( + # key=child.key, + # values=QEnum(found_values), + # metadata=child.metadata, + # children=children + # )]) diff --git a/tests/test_qubed_extraction.py b/tests/test_qubed_extraction.py index 4a543056a..4a57fee55 100644 --- a/tests/test_qubed_extraction.py +++ b/tests/test_qubed_extraction.py @@ -50,7 +50,7 @@ # TODO: start iterating fdb_tree and creating a new request tree -print(fdb_tree.) +# print(fdb_tree.) # Select("step", [0]), From e46258eaa60e0466a7489ea7f4315a15c9b51c65 Mon Sep 17 00:00:00 2001 From: mathleur Date: Tue, 25 Mar 2025 16:20:04 +0100 Subject: [PATCH 06/28] include axes transformations and datacube axes in qubed slicing --- .../datacube/backends/test_qubed_slicing.py | 75 ++++++++++++++++--- tests/test_qubed_extraction.py | 17 ++++- 2 files changed, 79 insertions(+), 13 deletions(-) diff --git a/polytope_feature/datacube/backends/test_qubed_slicing.py b/polytope_feature/datacube/backends/test_qubed_slicing.py index 47e014234..961bf509a 100644 --- a/polytope_feature/datacube/backends/test_qubed_slicing.py +++ b/polytope_feature/datacube/backends/test_qubed_slicing.py @@ -3,6 +3,7 @@ from typing import Iterator from ...engine.hullslicer import slice from copy import deepcopy +from ..datacube_axis import UnsliceableDatacubeAxis # from ...shapes import ConvexPolytope q = Qube.from_dict({ @@ -60,7 +61,7 @@ # return Qube.root_node(list(_slice(q, request))) -def slice(q: Qube, request: dict) -> 'Qube': +def slice_new(q: Qube, request: dict) -> 'Qube': def _slice(q: Qube, r: dict) -> list[Qube]: result = [] for child in q.children: @@ -102,10 +103,10 @@ def _slice(q: Qube, r: dict) -> list[Qube]: } -print(q) -q = slice(q, request) +# print(q) +# q = slice(q, request) -print(q) +# print(q) # new_q = Qube.from_dict({ # "expver=0001": {"param=1/2/3/4/5": {"level=0/1/2": {}}}, @@ -184,7 +185,7 @@ def _slice(q: Qube, r: dict) -> list[Qube]: # })) -def actual_slice(q: Qube, polytopes_to_slice) -> 'Qube': +def actual_slice(q: Qube, polytopes_to_slice, datacube_axes, datacube_transformations) -> 'Qube': def find_polytopes_on_axis(q: Qube, polytopes): polytopes_on_axis = [] @@ -192,35 +193,81 @@ def find_polytopes_on_axis(q: Qube, polytopes): for poly in polytopes: if axis_name in poly._axes: polytopes_on_axis.append(poly) + print("HERE NOW LOOK") + print(poly in polytopes) return polytopes_on_axis + def change_poly_axis_type(axis_name, polytopes, datacube_axes): + axis = datacube_axes[axis_name] + # TODO: loop through the polytopes and change each polytopes's values according to axis + if isinstance(axis, UnsliceableDatacubeAxis): + return + + for poly in polytopes: + i = 0 + for k, ax_name in enumerate(poly._axes): + if ax_name == axis_name: + i = k + for j, val in enumerate(poly.points): + poly.points[j][i] = axis.to_float(axis.parse(poly.points[j][i])) + def _axes_compressed(): return {} - def _slice(q: Qube, polytopes) -> list[Qube]: + def change_datacube_val_types(child: Qube, datacube_transformations): + axis_name = child.key + transformation = datacube_transformations.get(axis_name, None) + child_vals = child.values + new_vals = [] + for val in child_vals: + new_vals.append(transformation.transform_type(val)) + + return new_vals + + def _slice(q: Qube, polytopes, datacube_axes, datacube_transformations) -> list[Qube]: result = [] for child in q.children: # find polytopes which are defined on axis child.key polytopes_on_axis = find_polytopes_on_axis(child, polytopes) + + # here now first change the values in the polytopes on the axis to reflect the axis type + change_poly_axis_type(child.key, polytopes_on_axis, datacube_axes) + # for each polytope: for poly in polytopes_on_axis: # find extents of polytope on child.key lower, upper, slice_axis_idx = poly.extents(child.key) # find values on child that are within extents - found_vals = [v for v in child.values if lower <= v <= upper] + print("WHAT ABOUT HERE WHAT DO WE COMPARE") + print(lower) + print(child.key) + print([v for v in child.values]) + # here first change the child values of the datacube ie the Qubed tree to their right type with the transformation + modified_vals = change_datacube_val_types(child, datacube_transformations) + found_vals = [v for v in modified_vals if lower <= v <= upper] + # found_vals = [v for v in child.values if lower <= v <= upper] # slice polytope along each value on child and keep resulting polytopes in memory sliced_polys = [] for val in found_vals: # slice polytope along the value and add sliced polytope to list of polytopes in memory + print("WHERE DO WE SLICE AND WHAT") + print(val) + print(child.key) + print(poly.points) sliced_poly = slice(poly, child.key, val, slice_axis_idx) - sliced_polys.append(sliced_poly) + print("WHAT IS THE SLICED POLY??") + print(sliced_poly) + if sliced_poly: + sliced_polys.append(sliced_poly) # decide if axis should be compressed or not according to polytope - axis_compressed = _axes_compressed.get(child.key, False) + axis_compressed = _axes_compressed().get(child.key, False) # if it's not compressed, need to separate into different nodes to append to the tree if not axis_compressed and len(found_vals) > 1: for i, found_val in enumerate(found_vals): child_polytopes = deepcopy(polytopes) + # print("AND NOW ARE THERE STILL POLYTOPES??") + # print(len(polytopes)) child_polytopes.remove(poly) child_polytopes.append(sliced_polys[i]) children = _slice(child, child_polytopes) @@ -235,11 +282,15 @@ def _slice(q: Qube, polytopes) -> list[Qube]: result.append(qube_node) else: # if it's compressed, then can add all found values in a single node + print("AND NOW ARE THERE STILL POLYTOPES??") + print(len(polytopes)) + print(poly in deepcopy(polytopes)) + polytopes.remove(poly) child_polytopes = deepcopy(polytopes) - child_polytopes.remove(poly) + # child_polytopes.remove(poly) child_polytopes.extend(sliced_polys) # create children - children = _slice(child, child_polytopes) + children = _slice(child, child_polytopes, datacube_axes) # If this node used to have children but now has none due to filtering, skip it. if child.children and not children: continue @@ -253,7 +304,7 @@ def _slice(q: Qube, polytopes) -> list[Qube]: return result - return Qube.root_node(_slice(q, polytopes_to_slice)) + return Qube.root_node(_slice(q, polytopes_to_slice, datacube_axes, datacube_transformations)) # TODO: OLD CODE TO MODIFY diff --git a/tests/test_qubed_extraction.py b/tests/test_qubed_extraction.py index 4a57fee55..209d37a9f 100644 --- a/tests/test_qubed_extraction.py +++ b/tests/test_qubed_extraction.py @@ -1,6 +1,9 @@ from qubed import Qube import requests from polytope_feature.datacube.datacube_axis import PandasTimedeltaDatacubeAxis, PandasTimestampDatacubeAxis, UnsliceableDatacubeAxis +from polytope_feature.datacube.backends.test_qubed_slicing import actual_slice +from polytope_feature.datacube.transformations.datacube_type_change.datacube_type_change import TypeChangeStrToTimestamp, TypeChangeStrToTimedelta +import pandas as pd from polytope_feature.shapes import ConvexPolytope @@ -26,7 +29,7 @@ ConvexPolytope(["activity"], [["scenariomip"]]), ConvexPolytope(["dataset"], [["climate-dt"]]), ConvexPolytope(["class"], [["d1"]]), - ConvexPolytope(["date"], [["20190221", "20190223"]]) + ConvexPolytope(["date"], [[pd.Timestamp("20190221"), pd.Timestamp("20190223")]]) ] datacube_axes = {"param": UnsliceableDatacubeAxis(), @@ -45,6 +48,18 @@ "class": UnsliceableDatacubeAxis(), "date": PandasTimestampDatacubeAxis()} +time_val = ? +date_val = pd.Timestamp("20300101T000000") + +datacube_transformations = { + "time": TypeChangeStrToTimedelta("time", time_val), + "date": TypeChangeStrToTimestamp("date", date_val) +} + + +sliced_tree = actual_slice(fdb_tree, combi_polytopes, datacube_axes, datacube_transformations) + +print(sliced_tree) # TODO: treat the transformations to talk to the qubed tree, maybe do it From 6251edb9d91b2e1297cc2b41524efe43068dfe4b Mon Sep 17 00:00:00 2001 From: mathleur Date: Tue, 25 Mar 2025 16:52:57 +0100 Subject: [PATCH 07/28] WIP --- .../datacube/backends/test_qubed_slicing.py | 21 +++++++++++++++++-- tests/test_qubed_extraction.py | 6 +++--- 2 files changed, 22 insertions(+), 5 deletions(-) diff --git a/polytope_feature/datacube/backends/test_qubed_slicing.py b/polytope_feature/datacube/backends/test_qubed_slicing.py index 961bf509a..ef0585665 100644 --- a/polytope_feature/datacube/backends/test_qubed_slicing.py +++ b/polytope_feature/datacube/backends/test_qubed_slicing.py @@ -220,10 +220,23 @@ def change_datacube_val_types(child: Qube, datacube_transformations): child_vals = child.values new_vals = [] for val in child_vals: - new_vals.append(transformation.transform_type(val)) + if transformation: + new_vals.append(transformation.transform_type(val)) + else: + new_vals.append(val) return new_vals + def transform_upper_lower(axis_name, lower, upper, datacube_axes): + ax = datacube_axes[axis_name] + if isinstance(ax, UnsliceableDatacubeAxis): + return (lower, upper) + tol = ax.tol + lower = ax.from_float(lower - tol) + upper = ax.from_float(upper + tol) + + return (lower, upper) + def _slice(q: Qube, polytopes, datacube_axes, datacube_transformations) -> list[Qube]: result = [] for child in q.children: @@ -244,6 +257,10 @@ def _slice(q: Qube, polytopes, datacube_axes, datacube_transformations) -> list[ print([v for v in child.values]) # here first change the child values of the datacube ie the Qubed tree to their right type with the transformation modified_vals = change_datacube_val_types(child, datacube_transformations) + + # here use the axis to transform lower and upper to right type too + lower, upper = transform_upper_lower(child.key, lower, upper, datacube_axes) + found_vals = [v for v in modified_vals if lower <= v <= upper] # found_vals = [v for v in child.values if lower <= v <= upper] # slice polytope along each value on child and keep resulting polytopes in memory @@ -290,7 +307,7 @@ def _slice(q: Qube, polytopes, datacube_axes, datacube_transformations) -> list[ # child_polytopes.remove(poly) child_polytopes.extend(sliced_polys) # create children - children = _slice(child, child_polytopes, datacube_axes) + children = _slice(child, child_polytopes, datacube_axes, datacube_transformations) # If this node used to have children but now has none due to filtering, skip it. if child.children and not children: continue diff --git a/tests/test_qubed_extraction.py b/tests/test_qubed_extraction.py index 209d37a9f..aa88ac9e3 100644 --- a/tests/test_qubed_extraction.py +++ b/tests/test_qubed_extraction.py @@ -16,7 +16,7 @@ combi_polytopes = [ ConvexPolytope(["param"], [["168"]]), - ConvexPolytope(["time"], [["0000"], ["1200"]]), + ConvexPolytope(["time"], [[pd.Timedelta(hours=0, minutes=0)], [pd.Timedelta(hours=12, minutes=0)]]), ConvexPolytope(["resolution"], [["high"]]), ConvexPolytope(["type"], [["fc"]]), ConvexPolytope(["model"], ['ifs-nemo']), @@ -29,7 +29,7 @@ ConvexPolytope(["activity"], [["scenariomip"]]), ConvexPolytope(["dataset"], [["climate-dt"]]), ConvexPolytope(["class"], [["d1"]]), - ConvexPolytope(["date"], [[pd.Timestamp("20190221"), pd.Timestamp("20190223")]]) + ConvexPolytope(["date"], [[pd.Timestamp("20190221")], [pd.Timestamp("20190223")]]) ] datacube_axes = {"param": UnsliceableDatacubeAxis(), @@ -48,7 +48,7 @@ "class": UnsliceableDatacubeAxis(), "date": PandasTimestampDatacubeAxis()} -time_val = ? +time_val = pd.Timedelta(hours=0, minutes=0) date_val = pd.Timestamp("20300101T000000") datacube_transformations = { From df39acea61f65a61f552214f2a388de699057e70 Mon Sep 17 00:00:00 2001 From: mathleur Date: Wed, 26 Mar 2025 11:21:11 +0100 Subject: [PATCH 08/28] WIP --- .../datacube/backends/test_qubed_slicing.py | 79 +++++++++++++------ tests/test_qubed_extraction.py | 6 +- 2 files changed, 58 insertions(+), 27 deletions(-) diff --git a/polytope_feature/datacube/backends/test_qubed_slicing.py b/polytope_feature/datacube/backends/test_qubed_slicing.py index ef0585665..f23bdac86 100644 --- a/polytope_feature/datacube/backends/test_qubed_slicing.py +++ b/polytope_feature/datacube/backends/test_qubed_slicing.py @@ -193,8 +193,6 @@ def find_polytopes_on_axis(q: Qube, polytopes): for poly in polytopes: if axis_name in poly._axes: polytopes_on_axis.append(poly) - print("HERE NOW LOOK") - print(poly in polytopes) return polytopes_on_axis def change_poly_axis_type(axis_name, polytopes, datacube_axes): @@ -244,50 +242,71 @@ def _slice(q: Qube, polytopes, datacube_axes, datacube_transformations) -> list[ polytopes_on_axis = find_polytopes_on_axis(child, polytopes) # here now first change the values in the polytopes on the axis to reflect the axis type - change_poly_axis_type(child.key, polytopes_on_axis, datacube_axes) + # change_poly_axis_type(child.key, polytopes_on_axis, datacube_axes) # for each polytope: for poly in polytopes_on_axis: # find extents of polytope on child.key lower, upper, slice_axis_idx = poly.extents(child.key) # find values on child that are within extents - print("WHAT ABOUT HERE WHAT DO WE COMPARE") - print(lower) - print(child.key) - print([v for v in child.values]) + # print("WHAT ABOUT HERE WHAT DO WE COMPARE") + # print(lower) + # print(child.key) + # print([v for v in child.values]) # here first change the child values of the datacube ie the Qubed tree to their right type with the transformation modified_vals = change_datacube_val_types(child, datacube_transformations) # here use the axis to transform lower and upper to right type too - lower, upper = transform_upper_lower(child.key, lower, upper, datacube_axes) + print("WHAT WERE UPPER AND LOWER BEFORE") + print((lower, upper)) + print(poly._axes) + print(poly.points) + new_lower, new_upper = transform_upper_lower(child.key, lower, upper, datacube_axes) + + print("WHAT ARE THE FOUND VALS???") + + found_vals = [v for v in modified_vals if new_lower <= v <= new_upper] + + print(modified_vals) + print((new_lower, new_upper)) + print(found_vals) + + if len(found_vals) == 0: + continue - found_vals = [v for v in modified_vals if lower <= v <= upper] # found_vals = [v for v in child.values if lower <= v <= upper] # slice polytope along each value on child and keep resulting polytopes in memory sliced_polys = [] for val in found_vals: - # slice polytope along the value and add sliced polytope to list of polytopes in memory - print("WHERE DO WE SLICE AND WHAT") - print(val) - print(child.key) - print(poly.points) - sliced_poly = slice(poly, child.key, val, slice_axis_idx) - print("WHAT IS THE SLICED POLY??") - print(sliced_poly) - if sliced_poly: - sliced_polys.append(sliced_poly) + ax = datacube_axes[child.key] + if not isinstance(ax, UnsliceableDatacubeAxis): + fval = ax.to_float(val) + # slice polytope along the value and add sliced polytope to list of polytopes in memory + # print("WHERE DO WE SLICE AND WHAT") + # print(val) + # print(child.key) + # print(poly.points) + sliced_poly = slice(poly, child.key, fval, slice_axis_idx) + print("WHAT IS THE SLICED POLY??") + print(sliced_poly) + # if sliced_poly: + if True: + sliced_polys.append(sliced_poly) # decide if axis should be compressed or not according to polytope axis_compressed = _axes_compressed().get(child.key, False) # if it's not compressed, need to separate into different nodes to append to the tree if not axis_compressed and len(found_vals) > 1: for i, found_val in enumerate(found_vals): + # TODO: before removing polytope here actually, we should be careful that all the values in the polytope are on this branch... so we can't just remove here in theory + polytopes.remove(poly) child_polytopes = deepcopy(polytopes) # print("AND NOW ARE THERE STILL POLYTOPES??") # print(len(polytopes)) - child_polytopes.remove(poly) - child_polytopes.append(sliced_polys[i]) - children = _slice(child, child_polytopes) + # child_polytopes.remove(poly) + if sliced_polys[i]: + child_polytopes.append(sliced_polys[i]) + children = _slice(child, child_polytopes, datacube_axes, datacube_transformations) # If this node used to have children but now has none due to filtering, skip it. if child.children and not children: continue @@ -299,18 +318,22 @@ def _slice(q: Qube, polytopes, datacube_axes, datacube_transformations) -> list[ result.append(qube_node) else: # if it's compressed, then can add all found values in a single node - print("AND NOW ARE THERE STILL POLYTOPES??") - print(len(polytopes)) - print(poly in deepcopy(polytopes)) + # print("AND NOW ARE THERE STILL POLYTOPES??") + # print(len(polytopes)) + # print(poly in deepcopy(polytopes)) polytopes.remove(poly) child_polytopes = deepcopy(polytopes) # child_polytopes.remove(poly) child_polytopes.extend(sliced_polys) # create children children = _slice(child, child_polytopes, datacube_axes, datacube_transformations) + # print(" WHAT ARE THE CHILD VALUES AT THE END??") + # print(child.key) + # print(found_vals) # If this node used to have children but now has none due to filtering, skip it. if child.children and not children: continue + # TODO: add the child_polytopes to the child.metadata/ ie change child.metadata here before passing result.extend([Qube.make( key=child.key, @@ -321,6 +344,12 @@ def _slice(q: Qube, polytopes, datacube_axes, datacube_transformations) -> list[ return result + # TODO: change the polytope point types here + for polytope in polytopes_to_slice: + for axis in polytope._axes: + + change_poly_axis_type(axis, [polytope], datacube_axes) + return Qube.root_node(_slice(q, polytopes_to_slice, datacube_axes, datacube_transformations)) diff --git a/tests/test_qubed_extraction.py b/tests/test_qubed_extraction.py index aa88ac9e3..9989eace8 100644 --- a/tests/test_qubed_extraction.py +++ b/tests/test_qubed_extraction.py @@ -19,7 +19,7 @@ ConvexPolytope(["time"], [[pd.Timedelta(hours=0, minutes=0)], [pd.Timedelta(hours=12, minutes=0)]]), ConvexPolytope(["resolution"], [["high"]]), ConvexPolytope(["type"], [["fc"]]), - ConvexPolytope(["model"], ['ifs-nemo']), + ConvexPolytope(["model"], [['ifs-nemo']]), ConvexPolytope(["stream"], [["clte"]]), ConvexPolytope(["realization"], ["1"]), ConvexPolytope(["expver"], [['0001']]), @@ -29,7 +29,7 @@ ConvexPolytope(["activity"], [["scenariomip"]]), ConvexPolytope(["dataset"], [["climate-dt"]]), ConvexPolytope(["class"], [["d1"]]), - ConvexPolytope(["date"], [[pd.Timestamp("20190221")], [pd.Timestamp("20190223")]]) + ConvexPolytope(["date"], [[pd.Timestamp("20210728")], [pd.Timestamp("20210729")]]) ] datacube_axes = {"param": UnsliceableDatacubeAxis(), @@ -59,6 +59,8 @@ sliced_tree = actual_slice(fdb_tree, combi_polytopes, datacube_axes, datacube_transformations) + +print("THE FINAL RESULT IS") print(sliced_tree) # TODO: treat the transformations to talk to the qubed tree, maybe do it From ac32f007ddb4a26561346c02e11eb9523af1c506 Mon Sep 17 00:00:00 2001 From: mathleur Date: Wed, 26 Mar 2025 13:50:43 +0100 Subject: [PATCH 09/28] make qubed test work --- .../datacube/backends/test_qubed_slicing.py | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/polytope_feature/datacube/backends/test_qubed_slicing.py b/polytope_feature/datacube/backends/test_qubed_slicing.py index f23bdac86..3a0297de9 100644 --- a/polytope_feature/datacube/backends/test_qubed_slicing.py +++ b/polytope_feature/datacube/backends/test_qubed_slicing.py @@ -3,6 +3,7 @@ from typing import Iterator from ...engine.hullslicer import slice from copy import deepcopy +import pandas as pd from ..datacube_axis import UnsliceableDatacubeAxis # from ...shapes import ConvexPolytope @@ -287,8 +288,8 @@ def _slice(q: Qube, polytopes, datacube_axes, datacube_transformations) -> list[ # print(child.key) # print(poly.points) sliced_poly = slice(poly, child.key, fval, slice_axis_idx) - print("WHAT IS THE SLICED POLY??") - print(sliced_poly) + # print("WHAT IS THE SLICED POLY??") + # print(sliced_poly) # if sliced_poly: if True: sliced_polys.append(sliced_poly) @@ -297,9 +298,10 @@ def _slice(q: Qube, polytopes, datacube_axes, datacube_transformations) -> list[ axis_compressed = _axes_compressed().get(child.key, False) # if it's not compressed, need to separate into different nodes to append to the tree if not axis_compressed and len(found_vals) > 1: + polytopes.remove(poly) for i, found_val in enumerate(found_vals): # TODO: before removing polytope here actually, we should be careful that all the values in the polytope are on this branch... so we can't just remove here in theory - polytopes.remove(poly) + # polytopes.remove(poly) child_polytopes = deepcopy(polytopes) # print("AND NOW ARE THERE STILL POLYTOPES??") # print(len(polytopes)) @@ -311,6 +313,9 @@ def _slice(q: Qube, polytopes, datacube_axes, datacube_transformations) -> list[ if child.children and not children: continue # TODO: add the child_polytopes to the child.metadata/ ie change child.metadata here before passing + # print(type(found_val)) + if isinstance(found_val, pd.Timedelta) or isinstance(found_val, pd.Timestamp): + found_val = [str(found_val)] qube_node = Qube.make(key=child.key, values=QEnum(found_val), metadata=child.metadata, From 836f92324ac43148b2f8c94df6fc25baf7913e18 Mon Sep 17 00:00:00 2001 From: mathleur Date: Thu, 3 Apr 2025 17:13:34 +0200 Subject: [PATCH 10/28] fix bugs --- .../datacube/backends/test_qubed_slicing.py | 3 ++- tests/test_qubed_extraction.py | 27 +++++++++++++++++-- 2 files changed, 27 insertions(+), 3 deletions(-) diff --git a/polytope_feature/datacube/backends/test_qubed_slicing.py b/polytope_feature/datacube/backends/test_qubed_slicing.py index 3a0297de9..40db9001f 100644 --- a/polytope_feature/datacube/backends/test_qubed_slicing.py +++ b/polytope_feature/datacube/backends/test_qubed_slicing.py @@ -292,6 +292,7 @@ def _slice(q: Qube, polytopes, datacube_axes, datacube_transformations) -> list[ # print(sliced_poly) # if sliced_poly: if True: + # if sliced_poly: sliced_polys.append(sliced_poly) # decide if axis should be compressed or not according to polytope @@ -329,7 +330,7 @@ def _slice(q: Qube, polytopes, datacube_axes, datacube_transformations) -> list[ polytopes.remove(poly) child_polytopes = deepcopy(polytopes) # child_polytopes.remove(poly) - child_polytopes.extend(sliced_polys) + child_polytopes.extend([sliced_poly_ for sliced_poly_ in sliced_polys if sliced_poly_ is not None]) # create children children = _slice(child, child_polytopes, datacube_axes, datacube_transformations) # print(" WHAT ARE THE CHILD VALUES AT THE END??") diff --git a/tests/test_qubed_extraction.py b/tests/test_qubed_extraction.py index 9989eace8..cf0546cc0 100644 --- a/tests/test_qubed_extraction.py +++ b/tests/test_qubed_extraction.py @@ -11,11 +11,34 @@ fdb_tree = Qube.from_json(requests.get( "https://github.com/ecmwf/qubed/raw/refs/heads/main/tests/example_qubes/climate_dt.json").json()) + +# fdb_tree = fdb_tree.remove_by_key(["year"]).remove_by_key(["month"]) + +fdb_tree.print() + print(fdb_tree.axes().keys()) +# combi_polytopes = [ +# ConvexPolytope(["param"], [["168"]]), +# ConvexPolytope(["time"], [[pd.Timedelta(hours=0, minutes=0)], [pd.Timedelta(hours=12, minutes=0)]]), +# ConvexPolytope(["resolution"], [["high"]]), +# ConvexPolytope(["type"], [["fc"]]), +# ConvexPolytope(["model"], [['ifs-nemo']]), +# ConvexPolytope(["stream"], [["clte"]]), +# ConvexPolytope(["realization"], ["1"]), +# ConvexPolytope(["expver"], [['0001']]), +# ConvexPolytope(["experiment"], [['ssp3-7.0']]), +# ConvexPolytope(["generation"], [["1"]]), +# ConvexPolytope(["levtype"], [["sfc"]]), +# ConvexPolytope(["activity"], [["scenariomip"]]), +# ConvexPolytope(["dataset"], [["climate-dt"]]), +# ConvexPolytope(["class"], [["d1"]]), +# ConvexPolytope(["date"], [[pd.Timestamp("20210728")], [pd.Timestamp("20210729")]]) +# ] + combi_polytopes = [ - ConvexPolytope(["param"], [["168"]]), + ConvexPolytope(["param"], [["164"]]), ConvexPolytope(["time"], [[pd.Timedelta(hours=0, minutes=0)], [pd.Timedelta(hours=12, minutes=0)]]), ConvexPolytope(["resolution"], [["high"]]), ConvexPolytope(["type"], [["fc"]]), @@ -29,7 +52,7 @@ ConvexPolytope(["activity"], [["scenariomip"]]), ConvexPolytope(["dataset"], [["climate-dt"]]), ConvexPolytope(["class"], [["d1"]]), - ConvexPolytope(["date"], [[pd.Timestamp("20210728")], [pd.Timestamp("20210729")]]) + ConvexPolytope(["date"], [[pd.Timestamp("20220811")], [pd.Timestamp("20230812")]]) ] datacube_axes = {"param": UnsliceableDatacubeAxis(), From a4fd59726ad7c4a040e25aac770065d71354ba60 Mon Sep 17 00:00:00 2001 From: mathleur Date: Thu, 10 Apr 2025 14:14:54 +0200 Subject: [PATCH 11/28] clean up --- .gitignore | 3 +- .../datacube/backends/test_qubed_slicing.py | 43 +------------------ 2 files changed, 4 insertions(+), 42 deletions(-) diff --git a/.gitignore b/.gitignore index 742930a33..a4face77a 100644 --- a/.gitignore +++ b/.gitignore @@ -29,4 +29,5 @@ new_polytope_venv *.json venv_python3_11 *.txt -tests/data \ No newline at end of file +tests/data +venv_gj_iterator \ No newline at end of file diff --git a/polytope_feature/datacube/backends/test_qubed_slicing.py b/polytope_feature/datacube/backends/test_qubed_slicing.py index 40db9001f..8427ae8cd 100644 --- a/polytope_feature/datacube/backends/test_qubed_slicing.py +++ b/polytope_feature/datacube/backends/test_qubed_slicing.py @@ -243,39 +243,22 @@ def _slice(q: Qube, polytopes, datacube_axes, datacube_transformations) -> list[ polytopes_on_axis = find_polytopes_on_axis(child, polytopes) # here now first change the values in the polytopes on the axis to reflect the axis type - # change_poly_axis_type(child.key, polytopes_on_axis, datacube_axes) # for each polytope: for poly in polytopes_on_axis: # find extents of polytope on child.key lower, upper, slice_axis_idx = poly.extents(child.key) # find values on child that are within extents - # print("WHAT ABOUT HERE WHAT DO WE COMPARE") - # print(lower) - # print(child.key) - # print([v for v in child.values]) # here first change the child values of the datacube ie the Qubed tree to their right type with the transformation modified_vals = change_datacube_val_types(child, datacube_transformations) # here use the axis to transform lower and upper to right type too - print("WHAT WERE UPPER AND LOWER BEFORE") - print((lower, upper)) - print(poly._axes) - print(poly.points) new_lower, new_upper = transform_upper_lower(child.key, lower, upper, datacube_axes) - - print("WHAT ARE THE FOUND VALS???") - found_vals = [v for v in modified_vals if new_lower <= v <= new_upper] - print(modified_vals) - print((new_lower, new_upper)) - print(found_vals) - if len(found_vals) == 0: continue - # found_vals = [v for v in child.values if lower <= v <= upper] # slice polytope along each value on child and keep resulting polytopes in memory sliced_polys = [] for val in found_vals: @@ -283,17 +266,8 @@ def _slice(q: Qube, polytopes, datacube_axes, datacube_transformations) -> list[ if not isinstance(ax, UnsliceableDatacubeAxis): fval = ax.to_float(val) # slice polytope along the value and add sliced polytope to list of polytopes in memory - # print("WHERE DO WE SLICE AND WHAT") - # print(val) - # print(child.key) - # print(poly.points) sliced_poly = slice(poly, child.key, fval, slice_axis_idx) - # print("WHAT IS THE SLICED POLY??") - # print(sliced_poly) - # if sliced_poly: - if True: - # if sliced_poly: - sliced_polys.append(sliced_poly) + sliced_polys.append(sliced_poly) # decide if axis should be compressed or not according to polytope axis_compressed = _axes_compressed().get(child.key, False) @@ -302,11 +276,7 @@ def _slice(q: Qube, polytopes, datacube_axes, datacube_transformations) -> list[ polytopes.remove(poly) for i, found_val in enumerate(found_vals): # TODO: before removing polytope here actually, we should be careful that all the values in the polytope are on this branch... so we can't just remove here in theory - # polytopes.remove(poly) child_polytopes = deepcopy(polytopes) - # print("AND NOW ARE THERE STILL POLYTOPES??") - # print(len(polytopes)) - # child_polytopes.remove(poly) if sliced_polys[i]: child_polytopes.append(sliced_polys[i]) children = _slice(child, child_polytopes, datacube_axes, datacube_transformations) @@ -314,7 +284,6 @@ def _slice(q: Qube, polytopes, datacube_axes, datacube_transformations) -> list[ if child.children and not children: continue # TODO: add the child_polytopes to the child.metadata/ ie change child.metadata here before passing - # print(type(found_val)) if isinstance(found_val, pd.Timedelta) or isinstance(found_val, pd.Timestamp): found_val = [str(found_val)] qube_node = Qube.make(key=child.key, @@ -324,18 +293,11 @@ def _slice(q: Qube, polytopes, datacube_axes, datacube_transformations) -> list[ result.append(qube_node) else: # if it's compressed, then can add all found values in a single node - # print("AND NOW ARE THERE STILL POLYTOPES??") - # print(len(polytopes)) - # print(poly in deepcopy(polytopes)) polytopes.remove(poly) child_polytopes = deepcopy(polytopes) - # child_polytopes.remove(poly) child_polytopes.extend([sliced_poly_ for sliced_poly_ in sliced_polys if sliced_poly_ is not None]) # create children children = _slice(child, child_polytopes, datacube_axes, datacube_transformations) - # print(" WHAT ARE THE CHILD VALUES AT THE END??") - # print(child.key) - # print(found_vals) # If this node used to have children but now has none due to filtering, skip it. if child.children and not children: continue @@ -350,10 +312,9 @@ def _slice(q: Qube, polytopes, datacube_axes, datacube_transformations) -> list[ return result - # TODO: change the polytope point types here + # change the polytope point types here for polytope in polytopes_to_slice: for axis in polytope._axes: - change_poly_axis_type(axis, [polytope], datacube_axes) return Qube.root_node(_slice(q, polytopes_to_slice, datacube_axes, datacube_transformations)) From b4e52e5936f3a5ff9d808316d9991bbea6ea8de6 Mon Sep 17 00:00:00 2001 From: mathleur Date: Wed, 16 Apr 2025 16:57:54 +0200 Subject: [PATCH 12/28] WIP --- .gitignore | 3 +- .../datacube/backends/test_qubed_slicing.py | 9 +- .../engine/qubed_polytope_intersection.py | 8 + polytope_feature/engine/qubed_slicer.py | 181 ++++++++++++++++++ 4 files changed, 199 insertions(+), 2 deletions(-) create mode 100644 polytope_feature/engine/qubed_slicer.py diff --git a/.gitignore b/.gitignore index a4face77a..69063359a 100644 --- a/.gitignore +++ b/.gitignore @@ -30,4 +30,5 @@ new_polytope_venv venv_python3_11 *.txt tests/data -venv_gj_iterator \ No newline at end of file +venv_gj_iterator +new_venv_gj_iterator \ No newline at end of file diff --git a/polytope_feature/datacube/backends/test_qubed_slicing.py b/polytope_feature/datacube/backends/test_qubed_slicing.py index 8427ae8cd..cc9a06982 100644 --- a/polytope_feature/datacube/backends/test_qubed_slicing.py +++ b/polytope_feature/datacube/backends/test_qubed_slicing.py @@ -238,6 +238,10 @@ def transform_upper_lower(axis_name, lower, upper, datacube_axes): def _slice(q: Qube, polytopes, datacube_axes, datacube_transformations) -> list[Qube]: result = [] + + if len(q.children) == 0: + # TODO: add "fake" axes and their nodes in order -> what about merged axes?? + pass for child in q.children: # find polytopes which are defined on axis child.key polytopes_on_axis = find_polytopes_on_axis(child, polytopes) @@ -283,9 +287,12 @@ def _slice(q: Qube, polytopes, datacube_axes, datacube_transformations) -> list[ # If this node used to have children but now has none due to filtering, skip it. if child.children and not children: continue - # TODO: add the child_polytopes to the child.metadata/ ie change child.metadata here before passing + # TODO: add the child_polytopes to the child.metadata/ ie change child.metadata here before passing? if isinstance(found_val, pd.Timedelta) or isinstance(found_val, pd.Timestamp): found_val = [str(found_val)] + + # TODO: when we have an axis that we would like to merge with another, we should skip the node creation here + # and instead keep/cache the value to merge with the node from before?? qube_node = Qube.make(key=child.key, values=QEnum(found_val), metadata=child.metadata, diff --git a/polytope_feature/engine/qubed_polytope_intersection.py b/polytope_feature/engine/qubed_polytope_intersection.py index 4f8b1a7a4..14f275c59 100644 --- a/polytope_feature/engine/qubed_polytope_intersection.py +++ b/polytope_feature/engine/qubed_polytope_intersection.py @@ -7,6 +7,14 @@ from ..utility.list_tools import unique from qubed import Qube +from qubed import Qube +from qubed.value_types import QEnum +from typing import Iterator +from ...engine.hullslicer import slice +from copy import deepcopy +import pandas as pd +from ..datacube_axis import UnsliceableDatacubeAxis + class QubedSlicing(Engine): def __init__(self): diff --git a/polytope_feature/engine/qubed_slicer.py b/polytope_feature/engine/qubed_slicer.py new file mode 100644 index 000000000..297b40ab8 --- /dev/null +++ b/polytope_feature/engine/qubed_slicer.py @@ -0,0 +1,181 @@ + +import math +from copy import copy +from itertools import chain +from typing import List + +import scipy.spatial + +from ..datacube.backends.datacube import Datacube +from ..datacube.datacube_axis import UnsliceableDatacubeAxis +from ..datacube.tensor_index_tree import TensorIndexTree +from ..shapes import ConvexPolytope, Product +from ..utility.combinatorics import group, tensor_product +from ..utility.exceptions import UnsliceableShapeError +from ..utility.geometry import lerp +from ..utility.list_tools import argmax, argmin, unique +from .engine import Engine + +from qubed import Qube +from qubed.value_types import QEnum +from typing import Iterator +from ...engine.hullslicer import slice +from copy import deepcopy +import pandas as pd +from ..datacube_axis import UnsliceableDatacubeAxis + + +class QubedSlicer(Engine): + def __init__(self): + self.ax_is_unsliceable = {} + self.axis_values_between = {} + self.has_value = {} + self.sliced_polytopes = {} + self.remapped_vals = {} + self.compressed_axes = [] + + # TODO: assert that the associated datacube is an FDB one + + # TODO: change functions to reuse same methods as other slicers + + # TODO: get the transformations + datacube axes from the datacube now + + # TODO: separate the extract into the combinations and then do the slicing for each combination + + def find_polytope_combinations(self): + pass + + def actual_slice(q: Qube, polytopes_to_slice, datacube_axes, datacube_transformations) -> 'Qube': + # TODO: when there is a qubed leaf, make sure to add/consider the mapper grid axes too + + def find_polytopes_on_axis(q: Qube, polytopes): + polytopes_on_axis = [] + axis_name = q.key + for poly in polytopes: + if axis_name in poly._axes: + polytopes_on_axis.append(poly) + return polytopes_on_axis + + def change_poly_axis_type(axis_name, polytopes, datacube_axes): + axis = datacube_axes[axis_name] + # TODO: loop through the polytopes and change each polytopes's values according to axis + if isinstance(axis, UnsliceableDatacubeAxis): + return + + for poly in polytopes: + i = 0 + for k, ax_name in enumerate(poly._axes): + if ax_name == axis_name: + i = k + for j, val in enumerate(poly.points): + poly.points[j][i] = axis.to_float(axis.parse(poly.points[j][i])) + + def _axes_compressed(): + return {} + + def change_datacube_val_types(child: Qube, datacube_transformations): + axis_name = child.key + transformation = datacube_transformations.get(axis_name, None) + child_vals = child.values + new_vals = [] + for val in child_vals: + if transformation: + new_vals.append(transformation.transform_type(val)) + else: + new_vals.append(val) + + return new_vals + + def transform_upper_lower(axis_name, lower, upper, datacube_axes): + ax = datacube_axes[axis_name] + if isinstance(ax, UnsliceableDatacubeAxis): + return (lower, upper) + tol = ax.tol + lower = ax.from_float(lower - tol) + upper = ax.from_float(upper + tol) + + return (lower, upper) + + def _slice(q: Qube, polytopes, datacube_axes, datacube_transformations) -> list[Qube]: + result = [] + for child in q.children: + # find polytopes which are defined on axis child.key + polytopes_on_axis = find_polytopes_on_axis(child, polytopes) + + # here now first change the values in the polytopes on the axis to reflect the axis type + + # for each polytope: + for poly in polytopes_on_axis: + # find extents of polytope on child.key + lower, upper, slice_axis_idx = poly.extents(child.key) + # find values on child that are within extents + # here first change the child values of the datacube ie the Qubed tree to their right type with the transformation + modified_vals = change_datacube_val_types(child, datacube_transformations) + + # here use the axis to transform lower and upper to right type too + new_lower, new_upper = transform_upper_lower(child.key, lower, upper, datacube_axes) + found_vals = [v for v in modified_vals if new_lower <= v <= new_upper] + + if len(found_vals) == 0: + continue + + # slice polytope along each value on child and keep resulting polytopes in memory + sliced_polys = [] + for val in found_vals: + ax = datacube_axes[child.key] + if not isinstance(ax, UnsliceableDatacubeAxis): + fval = ax.to_float(val) + # slice polytope along the value and add sliced polytope to list of polytopes in memory + sliced_poly = slice(poly, child.key, fval, slice_axis_idx) + sliced_polys.append(sliced_poly) + + # decide if axis should be compressed or not according to polytope + axis_compressed = _axes_compressed().get(child.key, False) + # if it's not compressed, need to separate into different nodes to append to the tree + if not axis_compressed and len(found_vals) > 1: + polytopes.remove(poly) + for i, found_val in enumerate(found_vals): + # TODO: before removing polytope here actually, we should be careful that all the values in the polytope are on this branch... so we can't just remove here in theory + child_polytopes = deepcopy(polytopes) + if sliced_polys[i]: + child_polytopes.append(sliced_polys[i]) + children = _slice(child, child_polytopes, datacube_axes, datacube_transformations) + # If this node used to have children but now has none due to filtering, skip it. + if child.children and not children: + continue + # TODO: add the child_polytopes to the child.metadata/ ie change child.metadata here before passing? + if isinstance(found_val, pd.Timedelta) or isinstance(found_val, pd.Timestamp): + found_val = [str(found_val)] + qube_node = Qube.make(key=child.key, + values=QEnum(found_val), + metadata=child.metadata, + children=children) + result.append(qube_node) + else: + # if it's compressed, then can add all found values in a single node + polytopes.remove(poly) + child_polytopes = deepcopy(polytopes) + child_polytopes.extend( + [sliced_poly_ for sliced_poly_ in sliced_polys if sliced_poly_ is not None]) + # create children + children = _slice(child, child_polytopes, datacube_axes, datacube_transformations) + # If this node used to have children but now has none due to filtering, skip it. + if child.children and not children: + continue + + # TODO: add the child_polytopes to the child.metadata/ ie change child.metadata here before passing + result.extend([Qube.make( + key=child.key, + values=QEnum(found_vals), + metadata=child.metadata, + children=children + )]) + + return result + + # change the polytope point types here + for polytope in polytopes_to_slice: + for axis in polytope._axes: + change_poly_axis_type(axis, [polytope], datacube_axes) + + return Qube.root_node(_slice(q, polytopes_to_slice, datacube_axes, datacube_transformations)) From a418337136e42aea2e5671403a4375c14013f814 Mon Sep 17 00:00:00 2001 From: mathleur Date: Thu, 17 Apr 2025 11:06:38 +0200 Subject: [PATCH 13/28] fix slicing polytopes across several child nodes --- .../datacube/backends/test_qubed_slicing.py | 46 +++++++++++++++---- tests/test_qubed_extraction.py | 2 +- 2 files changed, 37 insertions(+), 11 deletions(-) diff --git a/polytope_feature/datacube/backends/test_qubed_slicing.py b/polytope_feature/datacube/backends/test_qubed_slicing.py index cc9a06982..c78f4ab69 100644 --- a/polytope_feature/datacube/backends/test_qubed_slicing.py +++ b/polytope_feature/datacube/backends/test_qubed_slicing.py @@ -236,13 +236,17 @@ def transform_upper_lower(axis_name, lower, upper, datacube_axes): return (lower, upper) + def find_grid_axes(): + # TODO: handle grid axes + pass + def _slice(q: Qube, polytopes, datacube_axes, datacube_transformations) -> list[Qube]: result = [] - if len(q.children) == 0: - # TODO: add "fake" axes and their nodes in order -> what about merged axes?? - pass - for child in q.children: + # if len(q.children) == 0: + # # TODO: add "fake" axes and their nodes in order -> what about merged axes?? + # pass + for i, child in enumerate(q.children): # find polytopes which are defined on axis child.key polytopes_on_axis = find_polytopes_on_axis(child, polytopes) @@ -252,6 +256,7 @@ def _slice(q: Qube, polytopes, datacube_axes, datacube_transformations) -> list[ for poly in polytopes_on_axis: # find extents of polytope on child.key lower, upper, slice_axis_idx = poly.extents(child.key) + # find values on child that are within extents # here first change the child values of the datacube ie the Qubed tree to their right type with the transformation modified_vals = change_datacube_val_types(child, datacube_transformations) @@ -272,15 +277,16 @@ def _slice(q: Qube, polytopes, datacube_axes, datacube_transformations) -> list[ # slice polytope along the value and add sliced polytope to list of polytopes in memory sliced_poly = slice(poly, child.key, fval, slice_axis_idx) sliced_polys.append(sliced_poly) - # decide if axis should be compressed or not according to polytope axis_compressed = _axes_compressed().get(child.key, False) # if it's not compressed, need to separate into different nodes to append to the tree if not axis_compressed and len(found_vals) > 1: - polytopes.remove(poly) + # TODO: if we have gone through all children, then can remove poly from list completely + # polytopes.remove(poly) for i, found_val in enumerate(found_vals): # TODO: before removing polytope here actually, we should be careful that all the values in the polytope are on this branch... so we can't just remove here in theory - child_polytopes = deepcopy(polytopes) + # child_polytopes = deepcopy(polytopes) + child_polytopes = [p for p in polytopes if p != poly] if sliced_polys[i]: child_polytopes.append(sliced_polys[i]) children = _slice(child, child_polytopes, datacube_axes, datacube_transformations) @@ -293,6 +299,11 @@ def _slice(q: Qube, polytopes, datacube_axes, datacube_transformations) -> list[ # TODO: when we have an axis that we would like to merge with another, we should skip the node creation here # and instead keep/cache the value to merge with the node from before?? + + print("HERE LOOK") + print(child.key) + print(found_val) + # print(children) qube_node = Qube.make(key=child.key, values=QEnum(found_val), metadata=child.metadata, @@ -300,8 +311,9 @@ def _slice(q: Qube, polytopes, datacube_axes, datacube_transformations) -> list[ result.append(qube_node) else: # if it's compressed, then can add all found values in a single node - polytopes.remove(poly) - child_polytopes = deepcopy(polytopes) + # polytopes.remove(poly) + # child_polytopes = deepcopy(polytopes) + child_polytopes = [p for p in polytopes if p != poly] child_polytopes.extend([sliced_poly_ for sliced_poly_ in sliced_polys if sliced_poly_ is not None]) # create children children = _slice(child, child_polytopes, datacube_axes, datacube_transformations) @@ -309,10 +321,24 @@ def _slice(q: Qube, polytopes, datacube_axes, datacube_transformations) -> list[ if child.children and not children: continue + new_found_vals = [] + for found_val in found_vals: + if isinstance(found_val, pd.Timedelta) or isinstance(found_val, pd.Timestamp): + print("DIDNT WE GO HERE?") + print(found_val) + print(str(found_val)) + # found_val = [str(found_val)] + new_found_vals.append(str(found_val)) + else: + new_found_vals.append(found_val) + + print("WHAT ABOUT HERE?") + print(found_vals) + print(new_found_vals) # TODO: add the child_polytopes to the child.metadata/ ie change child.metadata here before passing result.extend([Qube.make( key=child.key, - values=QEnum(found_vals), + values=QEnum(new_found_vals), metadata=child.metadata, children=children )]) diff --git a/tests/test_qubed_extraction.py b/tests/test_qubed_extraction.py index cf0546cc0..e78197a11 100644 --- a/tests/test_qubed_extraction.py +++ b/tests/test_qubed_extraction.py @@ -52,7 +52,7 @@ ConvexPolytope(["activity"], [["scenariomip"]]), ConvexPolytope(["dataset"], [["climate-dt"]]), ConvexPolytope(["class"], [["d1"]]), - ConvexPolytope(["date"], [[pd.Timestamp("20220811")], [pd.Timestamp("20230812")]]) + ConvexPolytope(["date"], [[pd.Timestamp("20220811")], [pd.Timestamp("20220912")]]) ] datacube_axes = {"param": UnsliceableDatacubeAxis(), From 3afaa2e6755292ef89af167a4c5c9a8a5aa752a0 Mon Sep 17 00:00:00 2001 From: mathleur Date: Thu, 17 Apr 2025 12:02:19 +0200 Subject: [PATCH 14/28] start adding grid axes --- .../datacube/backends/test_qubed_slicing.py | 28 +++++++++++++++---- 1 file changed, 22 insertions(+), 6 deletions(-) diff --git a/polytope_feature/datacube/backends/test_qubed_slicing.py b/polytope_feature/datacube/backends/test_qubed_slicing.py index c78f4ab69..3ca029600 100644 --- a/polytope_feature/datacube/backends/test_qubed_slicing.py +++ b/polytope_feature/datacube/backends/test_qubed_slicing.py @@ -5,6 +5,7 @@ from copy import deepcopy import pandas as pd from ..datacube_axis import UnsliceableDatacubeAxis +from ..transformations.datacube_mappers.datacube_mappers import DatacubeMapper # from ...shapes import ConvexPolytope q = Qube.from_dict({ @@ -188,9 +189,9 @@ def _slice(q: Qube, r: dict) -> list[Qube]: def actual_slice(q: Qube, polytopes_to_slice, datacube_axes, datacube_transformations) -> 'Qube': - def find_polytopes_on_axis(q: Qube, polytopes): + def find_polytopes_on_axis(axis_name, polytopes): polytopes_on_axis = [] - axis_name = q.key + # axis_name = q.key for poly in polytopes: if axis_name in poly._axes: polytopes_on_axis.append(poly) @@ -243,12 +244,27 @@ def find_grid_axes(): def _slice(q: Qube, polytopes, datacube_axes, datacube_transformations) -> list[Qube]: result = [] - # if len(q.children) == 0: - # # TODO: add "fake" axes and their nodes in order -> what about merged axes?? - # pass + if len(q.children) == 0: + # TODO: add "fake" axes and their nodes in order -> what about merged axes?? + mapper_transformation = None + for transformation in datacube_transformations: + if isinstance(transformation, DatacubeMapper): + mapper_transformation = transformation + if not mapper_transformation: + # There is no grid mapping + pass + else: + # TODO: Slice on the two grid axes + grid_axes = mapper_transformation._final_mapped_axes + + # TODO: Handle first grid axis + polytopes_on_axis = find_polytopes_on_axis(grid_axes[0], polytopes) + + pass + pass for i, child in enumerate(q.children): # find polytopes which are defined on axis child.key - polytopes_on_axis = find_polytopes_on_axis(child, polytopes) + polytopes_on_axis = find_polytopes_on_axis(child.key, polytopes) # here now first change the values in the polytopes on the axis to reflect the axis type From f2ada06f0d98154a3fd6fba0088ed9af5e93a32e Mon Sep 17 00:00:00 2001 From: mathleur Date: Thu, 17 Apr 2025 15:46:13 +0200 Subject: [PATCH 15/28] finish adding prototype of grid axes --- .../datacube/backends/test_qubed_slicing.py | 136 +++++++++++++++++- .../mapper_types/healpix_nested.py | 91 ++++++------ .../engine/qubed_polytope_intersection.py | 8 -- tests/test_qubed_extraction.py | 17 ++- 4 files changed, 193 insertions(+), 59 deletions(-) diff --git a/polytope_feature/datacube/backends/test_qubed_slicing.py b/polytope_feature/datacube/backends/test_qubed_slicing.py index 3ca029600..6ca3b8d93 100644 --- a/polytope_feature/datacube/backends/test_qubed_slicing.py +++ b/polytope_feature/datacube/backends/test_qubed_slicing.py @@ -241,13 +241,60 @@ def find_grid_axes(): # TODO: handle grid axes pass + def _slice_second_grid_axis(axis_name, polytopes, datacube_axes, datacube_transformations, second_axis_vals) -> list[Qube]: + result = [] + polytopes_on_axis = find_polytopes_on_axis(axis_name, polytopes) + + for poly in polytopes_on_axis: + lower, upper, slice_axis_idx = poly.extents(axis_name) + + new_lower, new_upper = transform_upper_lower(axis_name, lower, upper, datacube_axes) + found_vals = [v for v in second_axis_vals if new_lower <= v <= new_upper] + + if len(found_vals) == 0: + continue + + # slice polytope along each value on child and keep resulting polytopes in memory + sliced_polys = [] + for val in found_vals: + ax = datacube_axes[axis_name] + if not isinstance(ax, UnsliceableDatacubeAxis): + fval = ax.to_float(val) + # slice polytope along the value and add sliced polytope to list of polytopes in memory + sliced_poly = slice(poly, axis_name, fval, slice_axis_idx) + sliced_polys.append(sliced_poly) + # decide if axis should be compressed or not according to polytope + # NOTE: actually the second grid axis will always be compressed + # axis_compressed = _axes_compressed().get(axis_name, True) + + # if it's not compressed, need to separate into different nodes to append to the tree + + new_found_vals = [] + for found_val in found_vals: + if isinstance(found_val, pd.Timedelta) or isinstance(found_val, pd.Timestamp): + new_found_vals.append(str(found_val)) + else: + new_found_vals.append(found_val) + + # NOTE this was the last axis so we do not have children... + + # TODO: add the child_polytopes to the child.metadata/ ie change child.metadata here before passing + result.extend([Qube.make( + key=axis_name, + values=QEnum(new_found_vals), + metadata={}, + children={} + )]) + return result + # pass + def _slice(q: Qube, polytopes, datacube_axes, datacube_transformations) -> list[Qube]: result = [] if len(q.children) == 0: # TODO: add "fake" axes and their nodes in order -> what about merged axes?? mapper_transformation = None - for transformation in datacube_transformations: + for transformation in list(datacube_transformations.values()): if isinstance(transformation, DatacubeMapper): mapper_transformation = transformation if not mapper_transformation: @@ -255,13 +302,94 @@ def _slice(q: Qube, polytopes, datacube_axes, datacube_transformations) -> list[ pass else: # TODO: Slice on the two grid axes - grid_axes = mapper_transformation._final_mapped_axes + grid_axes = mapper_transformation._mapped_axes # TODO: Handle first grid axis polytopes_on_axis = find_polytopes_on_axis(grid_axes[0], polytopes) - pass - pass + for poly in polytopes_on_axis: + lower, upper, slice_axis_idx = poly.extents(grid_axes[0]) + + first_ax_vals = mapper_transformation.first_axis_vals() + + new_lower, new_upper = transform_upper_lower(grid_axes[0], lower, upper, datacube_axes) + found_vals = [v for v in first_ax_vals if new_lower <= v <= new_upper] + + if len(found_vals) == 0: + continue + + # slice polytope along each value on child and keep resulting polytopes in memory + sliced_polys = [] + for val in found_vals: + ax = datacube_axes[grid_axes[0]] + if not isinstance(ax, UnsliceableDatacubeAxis): + fval = ax.to_float(val) + # slice polytope along the value and add sliced polytope to list of polytopes in memory + sliced_poly = slice(poly, grid_axes[0], fval, slice_axis_idx) + sliced_polys.append(sliced_poly) + # decide if axis should be compressed or not according to polytope + # NOTE: actually the first grid axis will never be compressed + axis_compressed = _axes_compressed().get(grid_axes[0], False) + + # if it's not compressed, need to separate into different nodes to append to the tree + # if not axis_compressed and len(found_vals) > 1: + if True: + # TODO: if we have gone through all children, then can remove poly from list completely + # polytopes.remove(poly) + for i, found_val in enumerate(found_vals): + # TODO: before removing polytope here actually, we should be careful that all the values in the polytope are on this branch... so we can't just remove here in theory + # child_polytopes = deepcopy(polytopes) + child_polytopes = [p for p in polytopes if p != poly] + if sliced_polys[i]: + child_polytopes.append(sliced_polys[i]) + + second_axis_vals = mapper_transformation.second_axis_vals([found_val]) + + # TODO: get second axis children through slicing + children = _slice_second_grid_axis( + grid_axes[1], child_polytopes, datacube_axes, datacube_transformations, second_axis_vals) + # If this node used to have children but now has none due to filtering, skip it. + if not children: + continue + # TODO: add the child_polytopes to the child.metadata/ ie change child.metadata here before passing? + if isinstance(found_val, pd.Timedelta) or isinstance(found_val, pd.Timestamp): + found_val = [str(found_val)] + + # TODO: when we have an axis that we would like to merge with another, we should skip the node creation here + # and instead keep/cache the value to merge with the node from before?? + + qube_node = Qube.make(key=grid_axes[0], + values=QEnum([found_val]), + metadata={}, + children=children) + result.append(qube_node) + # else: + + # child_polytopes = [p for p in polytopes if p != poly] + # child_polytopes.extend( + # [sliced_poly_ for sliced_poly_ in sliced_polys if sliced_poly_ is not None]) + # # create children + # children = _slice_second_grid_axis( + # grid_axes[1], child_polytopes, datacube_axes, datacube_transformations) + # # If this node used to have children but now has none due to filtering, skip it. + # if not children: + # continue + + # new_found_vals = [] + # for found_val in found_vals: + # if isinstance(found_val, pd.Timedelta) or isinstance(found_val, pd.Timestamp): + # new_found_vals.append(str(found_val)) + # else: + # new_found_vals.append(found_val) + + # # TODO: add the child_polytopes to the child.metadata/ ie change child.metadata here before passing + # result.extend([Qube.make( + # key=grid_axes[0], + # values=QEnum(new_found_vals), + # metadata={}, + # children=children + # )]) + for i, child in enumerate(q.children): # find polytopes which are defined on axis child.key polytopes_on_axis = find_polytopes_on_axis(child.key, polytopes) diff --git a/polytope_feature/datacube/transformations/datacube_mappers/mapper_types/healpix_nested.py b/polytope_feature/datacube/transformations/datacube_mappers/mapper_types/healpix_nested.py index da3f0f1ae..36a69492a 100644 --- a/polytope_feature/datacube/transformations/datacube_mappers/mapper_types/healpix_nested.py +++ b/polytope_feature/datacube/transformations/datacube_mappers/mapper_types/healpix_nested.py @@ -1,3 +1,4 @@ +import bisect import math from ..datacube_mappers import DatacubeMapper @@ -13,11 +14,9 @@ def __init__(self, base_axis, mapped_axes, resolution, md5_hash=None, local_area self._first_axis_vals = self.first_axis_vals() self.compressed_grid_axes = [self._mapped_axes[1]] self.Nside = self._resolution - self._cached_longitudes = {} self.k = int(math.log2(self.Nside)) self.Npix = 12 * self.Nside * self.Nside self.Ncap = (self.Nside * (self.Nside - 1)) << 1 - self._healpix_longitudes = {} if md5_hash is not None: self.md5_hash = md5_hash else: @@ -59,9 +58,7 @@ def second_axis_vals(self, first_val): return values def second_axis_vals_from_idx(self, first_val_idx): - if first_val_idx not in self._healpix_longitudes: - self._healpix_longitudes[first_val_idx] = self.HEALPix_longitudes(first_val_idx) - values = self._healpix_longitudes[first_val_idx] + values = self.HEALPix_longitudes(first_val_idx) return values def HEALPix_nj(self, i): @@ -77,19 +74,14 @@ def HEALPix_nj(self, i): return self.HEALPix_nj(ni - 1 - i) def HEALPix_longitudes(self, i): - if i in self._cached_longitudes: - return self._cached_longitudes[i] - else: - Nj = self.HEALPix_nj(i) - step = 360.0 / Nj - start = ( - step / 2.0 - if i < self._resolution or 3 * self._resolution - 1 < i or (i + self._resolution) % 2 - else 0.0 - ) - - longitudes = [start + n * step for n in range(Nj)] - self._cached_longitudes[i] = longitudes + Nj = self.HEALPix_nj(i) + step = 360.0 / Nj + start = ( + step / 2.0 if i < self._resolution or 3 * self._resolution - 1 < i or (i + self._resolution) % 2 else 0.0 + ) + + longitudes = [start + n * step for n in range(Nj)] + return longitudes def map_second_axis(self, first_val, lower, upper): @@ -113,31 +105,48 @@ def axes_idx_to_healpix_idx(self, first_idx, second_idx): return idx for i in range(3 * self._resolution, 4 * self._resolution - 1): if i != first_idx: - idx += 4 * (4 * self._resolution - 1 - i) + idx += 4 * (4 * self._resolution - 1 - i + 1) else: idx += second_idx return idx - def unmap(self, first_val, second_vals): + def find_second_idx(self, first_val, second_val): + tol = 1e-10 + second_axis_vals = self.second_axis_vals(first_val) + second_idx = bisect.bisect_left(second_axis_vals, second_val - tol) + return second_idx + + def unmap_first_val_to_start_line_idx(self, first_val): tol = 1e-8 - first_idx = next( - (i for i, val in enumerate(self._first_axis_vals) if first_val[0] - tol <= val <= first_val[0] + tol), None - ) - if first_idx is None: - return None - second_axis_vals = self.second_axis_vals_from_idx(first_idx) - - return_idxs = [] - for second_val in second_vals: - second_idx = next( - (i for i, val in enumerate(second_axis_vals) if second_val - tol <= val <= second_val + tol), None - ) - if second_idx is None: - return None - healpix_index = self.axes_idx_to_healpix_idx(first_idx, second_idx) - nested_healpix_index = self.ring_to_nested(healpix_index) - return_idxs.append(nested_healpix_index) - return return_idxs + first_val = [i for i in self._first_axis_vals if first_val - tol <= i <= first_val + tol][0] + first_idx = self._first_axis_vals.index(first_val) + idx = 0 + for i in range(self._resolution - 1): + if i != first_idx: + idx += 4 * (i + 1) + else: + return idx + for i in range(self._resolution - 1, 3 * self._resolution): + if i != first_idx: + idx += 4 * self._resolution + else: + return idx + for i in range(3 * self._resolution, 4 * self._resolution - 1): + if i != first_idx: + idx += 4 * (4 * self._resolution - 1 - i + 1) + else: + return idx + + def unmap(self, first_val, second_val, unmapped_idx=None): + tol = 1e-8 + first_value = [i for i in self._first_axis_vals if first_val[0] - tol <= i <= first_val[0] + tol][0] + first_idx = self._first_axis_vals.index(first_value) + second_val = [i for i in self.second_axis_vals(first_val) if second_val[0] - tol <= i <= second_val[0] + tol][0] + second_idx = self.second_axis_vals(first_val).index(second_val) + healpix_index = self.axes_idx_to_healpix_idx(first_idx, second_idx) + # TODO: here do conversion of ring to nested healpix representation before returning + healpix_index = self.ring_to_nested(healpix_index) + return healpix_index def div_03(self, a, b): t = 1 if a >= (b << 1) else 0 @@ -213,8 +222,4 @@ def int_sqrt(self, i): # md5 grid hash in form {resolution : hash} -_md5_hash = { - 1024: "cbda19e48d4d7e5e22641154878b9b22", - 512: "47efaa0853e70948a41d5225e7653194", - 128: "f3dfeb7a5bbbdd13a20d10fdb3797c71", -} +_md5_hash = {} diff --git a/polytope_feature/engine/qubed_polytope_intersection.py b/polytope_feature/engine/qubed_polytope_intersection.py index 14f275c59..4f8b1a7a4 100644 --- a/polytope_feature/engine/qubed_polytope_intersection.py +++ b/polytope_feature/engine/qubed_polytope_intersection.py @@ -7,14 +7,6 @@ from ..utility.list_tools import unique from qubed import Qube -from qubed import Qube -from qubed.value_types import QEnum -from typing import Iterator -from ...engine.hullslicer import slice -from copy import deepcopy -import pandas as pd -from ..datacube_axis import UnsliceableDatacubeAxis - class QubedSlicing(Engine): def __init__(self): diff --git a/tests/test_qubed_extraction.py b/tests/test_qubed_extraction.py index e78197a11..bca3b50ad 100644 --- a/tests/test_qubed_extraction.py +++ b/tests/test_qubed_extraction.py @@ -1,9 +1,10 @@ from qubed import Qube import requests -from polytope_feature.datacube.datacube_axis import PandasTimedeltaDatacubeAxis, PandasTimestampDatacubeAxis, UnsliceableDatacubeAxis +from polytope_feature.datacube.datacube_axis import PandasTimedeltaDatacubeAxis, PandasTimestampDatacubeAxis, UnsliceableDatacubeAxis, FloatDatacubeAxis from polytope_feature.datacube.backends.test_qubed_slicing import actual_slice from polytope_feature.datacube.transformations.datacube_type_change.datacube_type_change import TypeChangeStrToTimestamp, TypeChangeStrToTimedelta import pandas as pd +from polytope_feature.datacube.transformations.datacube_mappers.mapper_types.healpix_nested import NestedHealpixGridMapper from polytope_feature.shapes import ConvexPolytope @@ -37,6 +38,7 @@ # ConvexPolytope(["date"], [[pd.Timestamp("20210728")], [pd.Timestamp("20210729")]]) # ] +# TODO: add lat/lon polygon combi_polytopes = [ ConvexPolytope(["param"], [["164"]]), ConvexPolytope(["time"], [[pd.Timedelta(hours=0, minutes=0)], [pd.Timedelta(hours=12, minutes=0)]]), @@ -52,9 +54,11 @@ ConvexPolytope(["activity"], [["scenariomip"]]), ConvexPolytope(["dataset"], [["climate-dt"]]), ConvexPolytope(["class"], [["d1"]]), - ConvexPolytope(["date"], [[pd.Timestamp("20220811")], [pd.Timestamp("20220912")]]) + ConvexPolytope(["date"], [[pd.Timestamp("20220811")], [pd.Timestamp("20220912")]]), + ConvexPolytope(["latitude", "longitude"], [[0, 0], [0.5, 0.5], [0, 0.5]]) ] +# TODO: add lat and lon axes datacube_axes = {"param": UnsliceableDatacubeAxis(), "time": PandasTimedeltaDatacubeAxis(), "resolution": UnsliceableDatacubeAxis(), @@ -69,14 +73,19 @@ "activity": UnsliceableDatacubeAxis(), "dataset": UnsliceableDatacubeAxis(), "class": UnsliceableDatacubeAxis(), - "date": PandasTimestampDatacubeAxis()} + "date": PandasTimestampDatacubeAxis(), + "latitude": FloatDatacubeAxis(), + "longitude": FloatDatacubeAxis()} time_val = pd.Timedelta(hours=0, minutes=0) date_val = pd.Timestamp("20300101T000000") + +# TODO: add grid axis transformation datacube_transformations = { "time": TypeChangeStrToTimedelta("time", time_val), - "date": TypeChangeStrToTimestamp("date", date_val) + "date": TypeChangeStrToTimestamp("date", date_val), + "values": NestedHealpixGridMapper("values", ["latitude", "longitude"], 1024) } From 615923778b1de47c54bdeec70dfade63172cb2e1 Mon Sep 17 00:00:00 2001 From: mathleur Date: Tue, 22 Apr 2025 12:51:57 +0200 Subject: [PATCH 16/28] clean up and start integrating into polytope fe --- polytope_feature/datacube/backends/qubed.py | 21 + .../datacube/backends/test_qubed_slicing.py | 372 ++++-------------- polytope_feature/engine/qubed_slicer.py | 243 +++++++++--- 3 files changed, 286 insertions(+), 350 deletions(-) create mode 100644 polytope_feature/datacube/backends/qubed.py diff --git a/polytope_feature/datacube/backends/qubed.py b/polytope_feature/datacube/backends/qubed.py new file mode 100644 index 000000000..f89183cd0 --- /dev/null +++ b/polytope_feature/datacube/backends/qubed.py @@ -0,0 +1,21 @@ +import logging +import operator +from copy import deepcopy +from itertools import product + +from .datacube import Datacube, TensorIndexTree + + +class QubedDatacube(Datacube): + + def __init__( + self, q, datacube_axes, datacube_transformations, config=None, axis_options=None, compressed_axes_options=[], alternative_axes=[], context=None + ): + self.q = q + # TODO: find datacube_axes and datacube_transformations from options like other datacube backends + self.datacube_axes = datacube_axes + self.datacube_transformations = datacube_transformations + + def get(self, requests: TensorIndexTree, context): + # TODO: use GJ to extract data from an fdb + return None diff --git a/polytope_feature/datacube/backends/test_qubed_slicing.py b/polytope_feature/datacube/backends/test_qubed_slicing.py index 6ca3b8d93..00919d324 100644 --- a/polytope_feature/datacube/backends/test_qubed_slicing.py +++ b/polytope_feature/datacube/backends/test_qubed_slicing.py @@ -1,197 +1,18 @@ from qubed import Qube from qubed.value_types import QEnum -from typing import Iterator +from qubed.set_operations import union from ...engine.hullslicer import slice -from copy import deepcopy import pandas as pd from ..datacube_axis import UnsliceableDatacubeAxis from ..transformations.datacube_mappers.datacube_mappers import DatacubeMapper -# from ...shapes import ConvexPolytope - -q = Qube.from_dict({ - "class=od": { - "expver=0001": {"param=1/2/3/4/5": {}}, - "expver=0002": {"param=1": {}, "param=2": {}}, - }, - "class=rd": { - "expver=0001": { - "param=1/2/3": {}, - "expver=0001": {"param=1/2/3": {}, } - }, - "expver=0002": {"param=1/2/3/4": {}}, - }, -}).compress() - -# polytopes_list = [ConvexPolytope(), ] - - -# def slice_poly(q: Qube, polytope): -# def _slice_poly(q: Qube, poly): -# for child in q.children: -# # For each child, find the polytopes we should slice on that axis -# right_unsliced_polytopes = [] -# for polytope in q.metadata["unsliced_polytopes"]: -# if q.key in polytope._axes: -# right_unsliced_polytopes.append(polytope) - -# for i, polytope in enumerate(right_unsliced_polytopes): -# lower, upper, slice_axis_idx = polytope.extents(q.key) - - -# def slice(q: Qube, request: dict) -> 'Qube': -# def _slice(q: Qube, r: dict) -> Iterator[Qube]: -# for child in q.children: -# requested_values = r.get(child.key, []) -# found_values = [v for v in requested_values if v in child.values] -# if not found_values: -# continue -# truncated_request = {k: v for k, v in r.items() if k != child.key} -# children = list(_slice(child, truncated_request)) - -# # If this node used to have children, i.e was not a leaf node, -# # but as a result of filtering now has no children -# # then filter it out. -# if child.children and not children: -# continue - -# yield Qube.make( -# key=child.key, -# values=QEnum(found_values), -# metadata=child.metadata, -# children=children, -# ) - -# return Qube.root_node(list(_slice(q, request))) - -def slice_new(q: Qube, request: dict) -> 'Qube': - def _slice(q: Qube, r: dict) -> list[Qube]: - result = [] - for child in q.children: - requested_values = r.get(child.key, []) - found_values = [v for v in requested_values if v in child.values] - if not found_values: - continue - truncated_request = {k: v for k, v in r.items() if k != child.key} - children = _slice(child, truncated_request) +from ...shapes import ConvexPolytope, Product +from ...utility.combinatorics import group, tensor_product - # If this node used to have children but now has none due to filtering, skip it. - if child.children and not children: - continue - if len(found_values) > 1: - result.extend([Qube.make( - key=child.key, - values=QEnum(val), - metadata=child.metadata, - children=children, - ) for val in found_values]) - else: - result.extend([Qube.make( - key=child.key, - values=QEnum(found_values), - metadata=child.metadata, - children=children - )]) - - return result - - return Qube.root_node(_slice(q, request)) - - -request = { - "expver": ["0001"], - "class": ["rd", "od"], - "param": ["1", "2", "3"], -} - - -# print(q) -# q = slice(q, request) - -# print(q) - -# new_q = Qube.from_dict({ -# "expver=0001": {"param=1/2/3/4/5": {"level=0/1/2": {}}}, -# "expver=0001": {"param=1": {"level=3/4": {}}, "param=2": {"level=3/4": {}}}, -# }).compress() - -# new_q = Qube.from_dict({ -# "expver=0001": {"param=1/2/3/4/5": {"level=0/1/2": {}}, "param=1": {"level=3/4": {}}, "param=2": {"level=3/4": {}}}, -# # "expver=0001": {"param=1": {"level=3/4": {}}, "param=2": {"level=3/4": {}}}, -# }).compress() - -# print("HERE") -# print(new_q) - -# print(new_q["expver", "0001"].children) - - -# def modified_slice(q: Qube, request: dict) -> 'Qube': -# def _slice(q: Qube, r: dict) -> Iterator[Qube]: -# for child in q.children: -# requested_values = r.get(child.key, []) -# found_values = [v for v in requested_values if v in child.values] -# if not found_values: -# continue -# print("HERE") -# print(r.items()) -# for k, v in r.items(): -# if k == "param": -# children = [] -# if "1" in found_values: -# truncated_request = {"level": "2"} -# children.extend(list(_slice(child, truncated_request))) -# if "2" in found_values: -# truncated_request = {"level": ["1", "2", "3"]} -# children.extend(list(_slice(child, truncated_request))) -# if "3" in found_values: -# truncated_request = {"level": "2"} -# children.extend(list(_slice(child, truncated_request))) -# else: -# truncated_request = {k: v for k, v in r.items() if k != child.key} -# children = list(_slice(child, truncated_request)) - -# # If this node used to have children, i.e was not a leaf node, -# # but as a result of filtering now has no children -# # then filter it out. -# if child.children and not children: -# continue - -# # for child in children: -# # yield Qube.make( -# # key=child.key, -# # values=QEnum(found_values), -# # metadata=child.metadata, -# # children=list(child), -# # ) -# print("WHAT NODES DID WE CREATE?") -# print(Qube.make( -# key=child.key, -# values=QEnum(found_values), -# metadata=child.metadata, -# children=children, -# )) -# yield Qube.make( -# key=child.key, -# values=QEnum(found_values), -# metadata=child.metadata, -# children=children, -# ) - -# return Qube.root_node(list(_slice(q, request))) - - -# print(modified_slice(new_q, request={ -# "expver": ["0001"], -# "param": ["1", "2", "3"], -# })) - - -def actual_slice(q: Qube, polytopes_to_slice, datacube_axes, datacube_transformations) -> 'Qube': +def _actual_slice(q: Qube, polytopes_to_slice, datacube_axes, datacube_transformations) -> 'Qube': def find_polytopes_on_axis(axis_name, polytopes): polytopes_on_axis = [] - # axis_name = q.key for poly in polytopes: if axis_name in poly._axes: polytopes_on_axis.append(poly) @@ -199,7 +20,7 @@ def find_polytopes_on_axis(axis_name, polytopes): def change_poly_axis_type(axis_name, polytopes, datacube_axes): axis = datacube_axes[axis_name] - # TODO: loop through the polytopes and change each polytopes's values according to axis + # loop through the polytopes and change each polytopes's values according to axis if isinstance(axis, UnsliceableDatacubeAxis): return @@ -237,10 +58,6 @@ def transform_upper_lower(axis_name, lower, upper, datacube_axes): return (lower, upper) - def find_grid_axes(): - # TODO: handle grid axes - pass - def _slice_second_grid_axis(axis_name, polytopes, datacube_axes, datacube_transformations, second_axis_vals) -> list[Qube]: result = [] polytopes_on_axis = find_polytopes_on_axis(axis_name, polytopes) @@ -265,7 +82,6 @@ def _slice_second_grid_axis(axis_name, polytopes, datacube_axes, datacube_transf sliced_polys.append(sliced_poly) # decide if axis should be compressed or not according to polytope # NOTE: actually the second grid axis will always be compressed - # axis_compressed = _axes_compressed().get(axis_name, True) # if it's not compressed, need to separate into different nodes to append to the tree @@ -278,7 +94,6 @@ def _slice_second_grid_axis(axis_name, polytopes, datacube_axes, datacube_transf # NOTE this was the last axis so we do not have children... - # TODO: add the child_polytopes to the child.metadata/ ie change child.metadata here before passing result.extend([Qube.make( key=axis_name, values=QEnum(new_found_vals), @@ -286,13 +101,12 @@ def _slice_second_grid_axis(axis_name, polytopes, datacube_axes, datacube_transf children={} )]) return result - # pass def _slice(q: Qube, polytopes, datacube_axes, datacube_transformations) -> list[Qube]: result = [] if len(q.children) == 0: - # TODO: add "fake" axes and their nodes in order -> what about merged axes?? + # add "fake" axes and their nodes in order -> what about merged axes?? mapper_transformation = None for transformation in list(datacube_transformations.values()): if isinstance(transformation, DatacubeMapper): @@ -301,10 +115,10 @@ def _slice(q: Qube, polytopes, datacube_axes, datacube_transformations) -> list[ # There is no grid mapping pass else: - # TODO: Slice on the two grid axes + # Slice on the two grid axes grid_axes = mapper_transformation._mapped_axes - # TODO: Handle first grid axis + # Handle first grid axis polytopes_on_axis = find_polytopes_on_axis(grid_axes[0], polytopes) for poly in polytopes_on_axis: @@ -332,63 +146,30 @@ def _slice(q: Qube, polytopes, datacube_axes, datacube_transformations) -> list[ axis_compressed = _axes_compressed().get(grid_axes[0], False) # if it's not compressed, need to separate into different nodes to append to the tree - # if not axis_compressed and len(found_vals) > 1: - if True: - # TODO: if we have gone through all children, then can remove poly from list completely - # polytopes.remove(poly) - for i, found_val in enumerate(found_vals): - # TODO: before removing polytope here actually, we should be careful that all the values in the polytope are on this branch... so we can't just remove here in theory - # child_polytopes = deepcopy(polytopes) - child_polytopes = [p for p in polytopes if p != poly] - if sliced_polys[i]: - child_polytopes.append(sliced_polys[i]) - - second_axis_vals = mapper_transformation.second_axis_vals([found_val]) - - # TODO: get second axis children through slicing - children = _slice_second_grid_axis( - grid_axes[1], child_polytopes, datacube_axes, datacube_transformations, second_axis_vals) - # If this node used to have children but now has none due to filtering, skip it. - if not children: - continue - # TODO: add the child_polytopes to the child.metadata/ ie change child.metadata here before passing? - if isinstance(found_val, pd.Timedelta) or isinstance(found_val, pd.Timestamp): - found_val = [str(found_val)] - - # TODO: when we have an axis that we would like to merge with another, we should skip the node creation here - # and instead keep/cache the value to merge with the node from before?? - - qube_node = Qube.make(key=grid_axes[0], - values=QEnum([found_val]), - metadata={}, - children=children) - result.append(qube_node) - # else: - - # child_polytopes = [p for p in polytopes if p != poly] - # child_polytopes.extend( - # [sliced_poly_ for sliced_poly_ in sliced_polys if sliced_poly_ is not None]) - # # create children - # children = _slice_second_grid_axis( - # grid_axes[1], child_polytopes, datacube_axes, datacube_transformations) - # # If this node used to have children but now has none due to filtering, skip it. - # if not children: - # continue - - # new_found_vals = [] - # for found_val in found_vals: - # if isinstance(found_val, pd.Timedelta) or isinstance(found_val, pd.Timestamp): - # new_found_vals.append(str(found_val)) - # else: - # new_found_vals.append(found_val) - - # # TODO: add the child_polytopes to the child.metadata/ ie change child.metadata here before passing - # result.extend([Qube.make( - # key=grid_axes[0], - # values=QEnum(new_found_vals), - # metadata={}, - # children=children - # )]) + for i, found_val in enumerate(found_vals): + child_polytopes = [p for p in polytopes if p != poly] + if sliced_polys[i]: + child_polytopes.append(sliced_polys[i]) + + second_axis_vals = mapper_transformation.second_axis_vals([found_val]) + + # get second axis children through slicing + children = _slice_second_grid_axis( + grid_axes[1], child_polytopes, datacube_axes, datacube_transformations, second_axis_vals) + # If this node used to have children but now has none due to filtering, skip it. + if not children: + continue + if isinstance(found_val, pd.Timedelta) or isinstance(found_val, pd.Timestamp): + found_val = [str(found_val)] + + # TODO: when we have an axis that we would like to merge with another, we should skip the node creation here + # and instead keep/cache the value to merge with the node from before?? + + qube_node = Qube.make(key=grid_axes[0], + values=QEnum([found_val]), + metadata={}, + children=children) + result.append(qube_node) for i, child in enumerate(q.children): # find polytopes which are defined on axis child.key @@ -396,7 +177,6 @@ def _slice(q: Qube, polytopes, datacube_axes, datacube_transformations) -> list[ # here now first change the values in the polytopes on the axis to reflect the axis type - # for each polytope: for poly in polytopes_on_axis: # find extents of polytope on child.key lower, upper, slice_axis_idx = poly.extents(child.key) @@ -425,11 +205,7 @@ def _slice(q: Qube, polytopes, datacube_axes, datacube_transformations) -> list[ axis_compressed = _axes_compressed().get(child.key, False) # if it's not compressed, need to separate into different nodes to append to the tree if not axis_compressed and len(found_vals) > 1: - # TODO: if we have gone through all children, then can remove poly from list completely - # polytopes.remove(poly) for i, found_val in enumerate(found_vals): - # TODO: before removing polytope here actually, we should be careful that all the values in the polytope are on this branch... so we can't just remove here in theory - # child_polytopes = deepcopy(polytopes) child_polytopes = [p for p in polytopes if p != poly] if sliced_polys[i]: child_polytopes.append(sliced_polys[i]) @@ -437,17 +213,12 @@ def _slice(q: Qube, polytopes, datacube_axes, datacube_transformations) -> list[ # If this node used to have children but now has none due to filtering, skip it. if child.children and not children: continue - # TODO: add the child_polytopes to the child.metadata/ ie change child.metadata here before passing? if isinstance(found_val, pd.Timedelta) or isinstance(found_val, pd.Timestamp): found_val = [str(found_val)] # TODO: when we have an axis that we would like to merge with another, we should skip the node creation here # and instead keep/cache the value to merge with the node from before?? - print("HERE LOOK") - print(child.key) - print(found_val) - # print(children) qube_node = Qube.make(key=child.key, values=QEnum(found_val), metadata=child.metadata, @@ -455,8 +226,6 @@ def _slice(q: Qube, polytopes, datacube_axes, datacube_transformations) -> list[ result.append(qube_node) else: # if it's compressed, then can add all found values in a single node - # polytopes.remove(poly) - # child_polytopes = deepcopy(polytopes) child_polytopes = [p for p in polytopes if p != poly] child_polytopes.extend([sliced_poly_ for sliced_poly_ in sliced_polys if sliced_poly_ is not None]) # create children @@ -468,18 +237,10 @@ def _slice(q: Qube, polytopes, datacube_axes, datacube_transformations) -> list[ new_found_vals = [] for found_val in found_vals: if isinstance(found_val, pd.Timedelta) or isinstance(found_val, pd.Timestamp): - print("DIDNT WE GO HERE?") - print(found_val) - print(str(found_val)) - # found_val = [str(found_val)] new_found_vals.append(str(found_val)) else: new_found_vals.append(found_val) - print("WHAT ABOUT HERE?") - print(found_vals) - print(new_found_vals) - # TODO: add the child_polytopes to the child.metadata/ ie change child.metadata here before passing result.extend([Qube.make( key=child.key, values=QEnum(new_found_vals), @@ -497,29 +258,48 @@ def _slice(q: Qube, polytopes, datacube_axes, datacube_transformations) -> list[ return Qube.root_node(_slice(q, polytopes_to_slice, datacube_axes, datacube_transformations)) -# TODO: OLD CODE TO MODIFY - # requested_values = r.get(child.key, []) - # found_values = [v for v in requested_values if v in child.values] - # if not found_values: - # continue - # truncated_request = {k: v for k, v in r.items() if k != child.key} - # children = _slice(child, truncated_request) - - # # If this node used to have children but now has none due to filtering, skip it. - # if child.children and not children: - # continue - - # if len(found_values) > 1: - # result.extend([Qube.make( - # key=child.key, - # values=QEnum(val), - # metadata=child.metadata, - # children=children, - # ) for val in found_values]) +def actual_slice(q: Qube, polytopes_to_slice, datacube_axes, datacube_transformations): + # for p in polytopes_to_slice: + # if isinstance(p, Product): + # for poly in p.polytope(): + # self._unique_continuous_points(poly, datacube) # else: - # result.extend([Qube.make( - # key=child.key, - # values=QEnum(found_values), - # metadata=child.metadata, - # children=children - # )]) + # self._unique_continuous_points(p, datacube) + + groups, input_axes = group(polytopes_to_slice) + # datacube.validate(input_axes) + # request = TensorIndexTree() + combinations = tensor_product(groups) + + sub_trees = [] + + # NOTE: could optimise here if we know combinations will always be for one request. + # Then we do not need to create a new index tree and merge it to request, but can just + # directly work on request and return it... + + for c in combinations: + # r = TensorIndexTree() + new_c = [] + for combi in c: + if isinstance(combi, list): + new_c.extend(combi) + else: + new_c.append(combi) + # NOTE TODO: here some of the polys in new_c can be a Product shape instead of a ConvexPolytope + # -> need to go through the polytopes in new_c and replace the Products with their sub-ConvexPolytopes + final_polys = [] + for poly in new_c: + if isinstance(poly, Product): + final_polys.extend(poly.polytope()) + else: + final_polys.append(poly) + + # Get the sliced Qube for each combi + r = _actual_slice(q, final_polys, datacube_axes, datacube_transformations) + sub_trees.append(r) + + final_tree = sub_trees[0] + + for sub_tree in sub_trees[1:]: + union(final_tree, sub_tree) + return final_tree diff --git a/polytope_feature/engine/qubed_slicer.py b/polytope_feature/engine/qubed_slicer.py index 297b40ab8..28af3f8ca 100644 --- a/polytope_feature/engine/qubed_slicer.py +++ b/polytope_feature/engine/qubed_slicer.py @@ -1,56 +1,33 @@ -import math -from copy import copy -from itertools import chain -from typing import List - -import scipy.spatial - -from ..datacube.backends.datacube import Datacube +from qubed import Qube +from qubed.value_types import QEnum +from qubed.set_operations import union +from .hullslicer import slice +from .engine import Engine +import pandas as pd from ..datacube.datacube_axis import UnsliceableDatacubeAxis -from ..datacube.tensor_index_tree import TensorIndexTree +from ..datacube.transformations.datacube_mappers.datacube_mappers import DatacubeMapper from ..shapes import ConvexPolytope, Product from ..utility.combinatorics import group, tensor_product -from ..utility.exceptions import UnsliceableShapeError -from ..utility.geometry import lerp -from ..utility.list_tools import argmax, argmin, unique -from .engine import Engine +from typing import List -from qubed import Qube -from qubed.value_types import QEnum -from typing import Iterator -from ...engine.hullslicer import slice -from copy import deepcopy -import pandas as pd -from ..datacube_axis import UnsliceableDatacubeAxis +from ..datacube.backends.datacube import Datacube + +# TODO: create a class for qubed slicer +# TODO: turn actual_slice into extract, which only takes in a Datacube instead of a Qube + datacube_axes + datacube_transformations + +# TODO: create a Qube datacube, which takes in a Qube and exposes the Qube + datacube_axes + datacube_transformations class QubedSlicer(Engine): def __init__(self): self.ax_is_unsliceable = {} - self.axis_values_between = {} - self.has_value = {} - self.sliced_polytopes = {} - self.remapped_vals = {} self.compressed_axes = [] - # TODO: assert that the associated datacube is an FDB one - - # TODO: change functions to reuse same methods as other slicers - - # TODO: get the transformations + datacube axes from the datacube now - - # TODO: separate the extract into the combinations and then do the slicing for each combination + def _actual_slice(self, q: Qube, polytopes_to_slice, datacube_axes, datacube_transformations) -> 'Qube': - def find_polytope_combinations(self): - pass - - def actual_slice(q: Qube, polytopes_to_slice, datacube_axes, datacube_transformations) -> 'Qube': - # TODO: when there is a qubed leaf, make sure to add/consider the mapper grid axes too - - def find_polytopes_on_axis(q: Qube, polytopes): + def find_polytopes_on_axis(axis_name, polytopes): polytopes_on_axis = [] - axis_name = q.key for poly in polytopes: if axis_name in poly._axes: polytopes_on_axis.append(poly) @@ -58,7 +35,7 @@ def find_polytopes_on_axis(q: Qube, polytopes): def change_poly_axis_type(axis_name, polytopes, datacube_axes): axis = datacube_axes[axis_name] - # TODO: loop through the polytopes and change each polytopes's values according to axis + # loop through the polytopes and change each polytopes's values according to axis if isinstance(axis, UnsliceableDatacubeAxis): return @@ -70,8 +47,8 @@ def change_poly_axis_type(axis_name, polytopes, datacube_axes): for j, val in enumerate(poly.points): poly.points[j][i] = axis.to_float(axis.parse(poly.points[j][i])) - def _axes_compressed(): - return {} + # def _axes_compressed(): + # return {} def change_datacube_val_types(child: Qube, datacube_transformations): axis_name = child.key @@ -96,18 +73,130 @@ def transform_upper_lower(axis_name, lower, upper, datacube_axes): return (lower, upper) + def _slice_second_grid_axis(axis_name, polytopes, datacube_axes, datacube_transformations, second_axis_vals) -> list[Qube]: + result = [] + polytopes_on_axis = find_polytopes_on_axis(axis_name, polytopes) + + for poly in polytopes_on_axis: + lower, upper, slice_axis_idx = poly.extents(axis_name) + + new_lower, new_upper = transform_upper_lower(axis_name, lower, upper, datacube_axes) + found_vals = [v for v in second_axis_vals if new_lower <= v <= new_upper] + + if len(found_vals) == 0: + continue + + # slice polytope along each value on child and keep resulting polytopes in memory + sliced_polys = [] + for val in found_vals: + ax = datacube_axes[axis_name] + if not isinstance(ax, UnsliceableDatacubeAxis): + fval = ax.to_float(val) + # slice polytope along the value and add sliced polytope to list of polytopes in memory + sliced_poly = slice(poly, axis_name, fval, slice_axis_idx) + sliced_polys.append(sliced_poly) + # decide if axis should be compressed or not according to polytope + # NOTE: actually the second grid axis will always be compressed + + # if it's not compressed, need to separate into different nodes to append to the tree + + new_found_vals = [] + for found_val in found_vals: + if isinstance(found_val, pd.Timedelta) or isinstance(found_val, pd.Timestamp): + new_found_vals.append(str(found_val)) + else: + new_found_vals.append(found_val) + + # NOTE this was the last axis so we do not have children... + + result.extend([Qube.make( + key=axis_name, + values=QEnum(new_found_vals), + metadata={}, + children={} + )]) + return result + def _slice(q: Qube, polytopes, datacube_axes, datacube_transformations) -> list[Qube]: result = [] - for child in q.children: + + if len(q.children) == 0: + # add "fake" axes and their nodes in order -> what about merged axes?? + mapper_transformation = None + for transformation in list(datacube_transformations.values()): + if isinstance(transformation, DatacubeMapper): + mapper_transformation = transformation + if not mapper_transformation: + # There is no grid mapping + pass + else: + # Slice on the two grid axes + grid_axes = mapper_transformation._mapped_axes + + # Handle first grid axis + polytopes_on_axis = find_polytopes_on_axis(grid_axes[0], polytopes) + + for poly in polytopes_on_axis: + lower, upper, slice_axis_idx = poly.extents(grid_axes[0]) + + first_ax_vals = mapper_transformation.first_axis_vals() + + new_lower, new_upper = transform_upper_lower(grid_axes[0], lower, upper, datacube_axes) + found_vals = [v for v in first_ax_vals if new_lower <= v <= new_upper] + + if len(found_vals) == 0: + continue + + # slice polytope along each value on child and keep resulting polytopes in memory + sliced_polys = [] + for val in found_vals: + ax = datacube_axes[grid_axes[0]] + if not isinstance(ax, UnsliceableDatacubeAxis): + fval = ax.to_float(val) + # slice polytope along the value and add sliced polytope to list of polytopes in memory + sliced_poly = slice(poly, grid_axes[0], fval, slice_axis_idx) + sliced_polys.append(sliced_poly) + # decide if axis should be compressed or not according to polytope + # NOTE: actually the first grid axis will never be compressed + # axis_compressed = self.compressed_axes.get(grid_axes[0], False) + axis_compressed = (grid_axes[0] in self.compressed_axes) + + # if it's not compressed, need to separate into different nodes to append to the tree + for i, found_val in enumerate(found_vals): + child_polytopes = [p for p in polytopes if p != poly] + if sliced_polys[i]: + child_polytopes.append(sliced_polys[i]) + + second_axis_vals = mapper_transformation.second_axis_vals([found_val]) + + # get second axis children through slicing + children = _slice_second_grid_axis( + grid_axes[1], child_polytopes, datacube_axes, datacube_transformations, second_axis_vals) + # If this node used to have children but now has none due to filtering, skip it. + if not children: + continue + if isinstance(found_val, pd.Timedelta) or isinstance(found_val, pd.Timestamp): + found_val = [str(found_val)] + + # TODO: when we have an axis that we would like to merge with another, we should skip the node creation here + # and instead keep/cache the value to merge with the node from before?? + + qube_node = Qube.make(key=grid_axes[0], + values=QEnum([found_val]), + metadata={}, + children=children) + result.append(qube_node) + + for i, child in enumerate(q.children): # find polytopes which are defined on axis child.key - polytopes_on_axis = find_polytopes_on_axis(child, polytopes) + polytopes_on_axis = find_polytopes_on_axis(child.key, polytopes) # here now first change the values in the polytopes on the axis to reflect the axis type - # for each polytope: for poly in polytopes_on_axis: # find extents of polytope on child.key lower, upper, slice_axis_idx = poly.extents(child.key) + # find values on child that are within extents # here first change the child values of the datacube ie the Qubed tree to their right type with the transformation modified_vals = change_datacube_val_types(child, datacube_transformations) @@ -128,24 +217,24 @@ def _slice(q: Qube, polytopes, datacube_axes, datacube_transformations) -> list[ # slice polytope along the value and add sliced polytope to list of polytopes in memory sliced_poly = slice(poly, child.key, fval, slice_axis_idx) sliced_polys.append(sliced_poly) - # decide if axis should be compressed or not according to polytope axis_compressed = _axes_compressed().get(child.key, False) # if it's not compressed, need to separate into different nodes to append to the tree if not axis_compressed and len(found_vals) > 1: - polytopes.remove(poly) for i, found_val in enumerate(found_vals): - # TODO: before removing polytope here actually, we should be careful that all the values in the polytope are on this branch... so we can't just remove here in theory - child_polytopes = deepcopy(polytopes) + child_polytopes = [p for p in polytopes if p != poly] if sliced_polys[i]: child_polytopes.append(sliced_polys[i]) children = _slice(child, child_polytopes, datacube_axes, datacube_transformations) # If this node used to have children but now has none due to filtering, skip it. if child.children and not children: continue - # TODO: add the child_polytopes to the child.metadata/ ie change child.metadata here before passing? if isinstance(found_val, pd.Timedelta) or isinstance(found_val, pd.Timestamp): found_val = [str(found_val)] + + # TODO: when we have an axis that we would like to merge with another, we should skip the node creation here + # and instead keep/cache the value to merge with the node from before?? + qube_node = Qube.make(key=child.key, values=QEnum(found_val), metadata=child.metadata, @@ -153,8 +242,7 @@ def _slice(q: Qube, polytopes, datacube_axes, datacube_transformations) -> list[ result.append(qube_node) else: # if it's compressed, then can add all found values in a single node - polytopes.remove(poly) - child_polytopes = deepcopy(polytopes) + child_polytopes = [p for p in polytopes if p != poly] child_polytopes.extend( [sliced_poly_ for sliced_poly_ in sliced_polys if sliced_poly_ is not None]) # create children @@ -163,10 +251,16 @@ def _slice(q: Qube, polytopes, datacube_axes, datacube_transformations) -> list[ if child.children and not children: continue - # TODO: add the child_polytopes to the child.metadata/ ie change child.metadata here before passing + new_found_vals = [] + for found_val in found_vals: + if isinstance(found_val, pd.Timedelta) or isinstance(found_val, pd.Timestamp): + new_found_vals.append(str(found_val)) + else: + new_found_vals.append(found_val) + result.extend([Qube.make( key=child.key, - values=QEnum(found_vals), + values=QEnum(new_found_vals), metadata=child.metadata, children=children )]) @@ -179,3 +273,44 @@ def _slice(q: Qube, polytopes, datacube_axes, datacube_transformations) -> list[ change_poly_axis_type(axis, [polytope], datacube_axes) return Qube.root_node(_slice(q, polytopes_to_slice, datacube_axes, datacube_transformations)) + + def actual_slice(self, q: Qube, polytopes_to_slice, datacube_axes, datacube_transformations): + + groups, input_axes = group(polytopes_to_slice) + combinations = tensor_product(groups) + + sub_trees = [] + + # NOTE: could optimise here if we know combinations will always be for one request. + # Then we do not need to create a new index tree and merge it to request, but can just + # directly work on request and return it... + + for c in combinations: + new_c = [] + for combi in c: + if isinstance(combi, list): + new_c.extend(combi) + else: + new_c.append(combi) + final_polys = [] + for poly in new_c: + if isinstance(poly, Product): + final_polys.extend(poly.polytope()) + else: + final_polys.append(poly) + + # Get the sliced Qube for each combi + r = self._actual_slice(q, final_polys, datacube_axes, datacube_transformations) + sub_trees.append(r) + + final_tree = sub_trees[0] + + for sub_tree in sub_trees[1:]: + union(final_tree, sub_tree) + return final_tree + + def extract(self, datacube: Datacube, polytopes: List[ConvexPolytope]): + self.find_compressed_axes(datacube, polytopes) + assert isinstance(datacube, QubedDatacube) + tree = self.actual_slice(datacube.q, polytopes, datacube.datacube_axes, datacube.datacube_transformations) + return tree From 290bc825b0b71bb0d2866b9d884fe992527a5d83 Mon Sep 17 00:00:00 2001 From: mathleur Date: Tue, 22 Apr 2025 13:18:15 +0200 Subject: [PATCH 17/28] integrate qubed more into a backend + slicer --- .../datacube/backends/datacube.py | 2 + polytope_feature/datacube/backends/fdb.py | 1 + polytope_feature/datacube/backends/qubed.py | 5 +- polytope_feature/engine/qubed_slicer.py | 27 +++- tests/test_qubed_extraction_engine.py | 145 ++++++++++++++++++ 5 files changed, 178 insertions(+), 2 deletions(-) create mode 100644 tests/test_qubed_extraction_engine.py diff --git a/polytope_feature/datacube/backends/datacube.py b/polytope_feature/datacube/backends/datacube.py index e8e41db0b..43f9d0456 100644 --- a/polytope_feature/datacube/backends/datacube.py +++ b/polytope_feature/datacube/backends/datacube.py @@ -166,6 +166,8 @@ def create(datacube, config={}, axis_options={}, compressed_axes_options=[], alt datacube, config, axis_options, compressed_axes_options, alternative_axes, context ) return fdbdatacube + if type(datacube).__name__ == "QubedDatacube": + return datacube def check_branching_axes(self, request): pass diff --git a/polytope_feature/datacube/backends/fdb.py b/polytope_feature/datacube/backends/fdb.py index 64304e379..af027cd04 100644 --- a/polytope_feature/datacube/backends/fdb.py +++ b/polytope_feature/datacube/backends/fdb.py @@ -18,6 +18,7 @@ def __init__( context = {} super().__init__(axis_options, compressed_axes_options) + print(axis_options) logging.info("Created an FDB datacube with options: " + str(axis_options)) diff --git a/polytope_feature/datacube/backends/qubed.py b/polytope_feature/datacube/backends/qubed.py index f89183cd0..5dfa43b68 100644 --- a/polytope_feature/datacube/backends/qubed.py +++ b/polytope_feature/datacube/backends/qubed.py @@ -15,7 +15,10 @@ def __init__( # TODO: find datacube_axes and datacube_transformations from options like other datacube backends self.datacube_axes = datacube_axes self.datacube_transformations = datacube_transformations + # TODO: find compressed_axes list + self.compressed_axes = [] + self._axes = datacube_axes def get(self, requests: TensorIndexTree, context): # TODO: use GJ to extract data from an fdb - return None + return requests diff --git a/polytope_feature/engine/qubed_slicer.py b/polytope_feature/engine/qubed_slicer.py index 28af3f8ca..fe42b01a3 100644 --- a/polytope_feature/engine/qubed_slicer.py +++ b/polytope_feature/engine/qubed_slicer.py @@ -3,6 +3,7 @@ from qubed.value_types import QEnum from qubed.set_operations import union from .hullslicer import slice +from ..datacube.backends.qubed import QubedDatacube from .engine import Engine import pandas as pd from ..datacube.datacube_axis import UnsliceableDatacubeAxis @@ -218,7 +219,8 @@ def _slice(q: Qube, polytopes, datacube_axes, datacube_transformations) -> list[ sliced_poly = slice(poly, child.key, fval, slice_axis_idx) sliced_polys.append(sliced_poly) # decide if axis should be compressed or not according to polytope - axis_compressed = _axes_compressed().get(child.key, False) + # axis_compressed = self.compressed_axes.get(child.key, False) + axis_compressed = (child.key in self.compressed_axes) # if it's not compressed, need to separate into different nodes to append to the tree if not axis_compressed and len(found_vals) > 1: for i, found_val in enumerate(found_vals): @@ -309,6 +311,29 @@ def actual_slice(self, q: Qube, polytopes_to_slice, datacube_axes, datacube_tran union(final_tree, sub_tree) return final_tree + def find_compressed_axes(self, datacube, polytopes): + # First determine compressable axes from input polytopes + compressable_axes = [] + for polytope in polytopes: + if polytope.is_orthogonal: + for ax in polytope.axes(): + compressable_axes.append(ax) + # Cross check this list with list of compressable axis from datacube + # (should not include any merged or coupled axes) + for compressed_axis in compressable_axes: + if compressed_axis in datacube.compressed_axes: + self.compressed_axes.append(compressed_axis) + # add the last axis of the grid always (longitude) as a compressed axis + k, last_value = _, datacube.axes[k] = datacube.axes.popitem() + self.compressed_axes.append(k) + + def remove_compressed_axis_in_union(self, polytopes): + for p in polytopes: + if p.is_in_union: + for axis in p.axes(): + if axis == self.compressed_axes[-1]: + self.compressed_axes.remove(axis) + def extract(self, datacube: Datacube, polytopes: List[ConvexPolytope]): self.find_compressed_axes(datacube, polytopes) assert isinstance(datacube, QubedDatacube) diff --git a/tests/test_qubed_extraction_engine.py b/tests/test_qubed_extraction_engine.py new file mode 100644 index 000000000..a6ebc41da --- /dev/null +++ b/tests/test_qubed_extraction_engine.py @@ -0,0 +1,145 @@ +from polytope_feature.shapes import Box, Select, Span +from polytope_feature.polytope import Polytope, Request +from polytope_feature.engine.qubed_slicer import QubedSlicer +from polytope_feature.datacube.backends.qubed import QubedDatacube +import pytest +from qubed import Qube +import requests +from polytope_feature.datacube.datacube_axis import PandasTimedeltaDatacubeAxis, PandasTimestampDatacubeAxis, UnsliceableDatacubeAxis, FloatDatacubeAxis +from polytope_feature.datacube.backends.test_qubed_slicing import actual_slice +from polytope_feature.datacube.transformations.datacube_type_change.datacube_type_change import TypeChangeStrToTimestamp, TypeChangeStrToTimedelta +import pandas as pd +from polytope_feature.datacube.transformations.datacube_mappers.mapper_types.healpix_nested import NestedHealpixGridMapper + +from polytope_feature.shapes import ConvexPolytope + + +fdb_tree = Qube.from_json(requests.get( + "https://github.com/ecmwf/qubed/raw/refs/heads/main/tests/example_qubes/climate_dt.json").json()) + + +combi_polytopes = [ + ConvexPolytope(["param"], [["164"]]), + ConvexPolytope(["time"], [[pd.Timedelta(hours=0, minutes=0)], [pd.Timedelta(hours=12, minutes=0)]]), + ConvexPolytope(["resolution"], [["high"]]), + ConvexPolytope(["type"], [["fc"]]), + ConvexPolytope(["model"], [['ifs-nemo']]), + ConvexPolytope(["stream"], [["clte"]]), + ConvexPolytope(["realization"], ["1"]), + ConvexPolytope(["expver"], [['0001']]), + ConvexPolytope(["experiment"], [['ssp3-7.0']]), + ConvexPolytope(["generation"], [["1"]]), + ConvexPolytope(["levtype"], [["sfc"]]), + ConvexPolytope(["activity"], [["scenariomip"]]), + ConvexPolytope(["dataset"], [["climate-dt"]]), + ConvexPolytope(["class"], [["d1"]]), + ConvexPolytope(["date"], [[pd.Timestamp("20220811")], [pd.Timestamp("20220912")]]), + ConvexPolytope(["latitude", "longitude"], [[0, 0], [0.5, 0.5], [0, 0.5]]) +] + +# TODO: add lat and lon axes +datacube_axes = {"param": UnsliceableDatacubeAxis(), + "time": PandasTimedeltaDatacubeAxis(), + "resolution": UnsliceableDatacubeAxis(), + "type": UnsliceableDatacubeAxis(), + "model": UnsliceableDatacubeAxis(), + "stream": UnsliceableDatacubeAxis(), + "realization": UnsliceableDatacubeAxis(), + "expver": UnsliceableDatacubeAxis(), + "experiment": UnsliceableDatacubeAxis(), + "generation": UnsliceableDatacubeAxis(), + "levtype": UnsliceableDatacubeAxis(), + "activity": UnsliceableDatacubeAxis(), + "dataset": UnsliceableDatacubeAxis(), + "class": UnsliceableDatacubeAxis(), + "date": PandasTimestampDatacubeAxis(), + "latitude": FloatDatacubeAxis(), + "longitude": FloatDatacubeAxis()} + +time_val = pd.Timedelta(hours=0, minutes=0) +date_val = pd.Timestamp("20300101T000000") + + +# TODO: add grid axis transformation +datacube_transformations = { + "time": TypeChangeStrToTimedelta("time", time_val), + "date": TypeChangeStrToTimestamp("date", date_val), + "values": NestedHealpixGridMapper("values", ["latitude", "longitude"], 1024) +} + + +options = { + "axis_config": [ + {"axis_name": "step", "transformations": [{"name": "type_change", "type": "int"}]}, + {"axis_name": "number", "transformations": [{"name": "type_change", "type": "int"}]}, + { + "axis_name": "date", + "transformations": [{"name": "merge", "other_axis": "time", "linkers": ["T", "00"]}], + }, + { + "axis_name": "values", + "transformations": [ + {"name": "mapper", "type": "octahedral", "resolution": 1280, "axes": ["latitude", "longitude"]} + ], + }, + {"axis_name": "latitude", "transformations": [{"name": "reverse", "is_reverse": True}]}, + {"axis_name": "longitude", "transformations": [{"name": "cyclic", "range": [0, 360]}]}, + ], + "compressed_axes_config": [ + "longitude", + "latitude", + "levtype", + "step", + "date", + "domain", + "expver", + "param", + "class", + "stream", + "type", + ], + "pre_path": {"class": "od", "expver": "0001", "levtype": "sfc", "stream": "oper"}, +} + +# request = Request( +# Select("step", [0]), +# Select("levtype", ["sfc"]), +# Select("date", [pd.Timestamp("20230625T120000")]), +# Select("domain", ["g"]), +# Select("expver", ["0001"]), +# Select("param", ["167"]), +# Select("class", ["od"]), +# Select("stream", ["oper"]), +# Select("type", ["an"]), +# Box(["latitude", "longitude"], [0, 0], [0.2, 0.2]), +# ) + +request = Request(ConvexPolytope(["param"], [["164"]]), + ConvexPolytope(["time"], [[pd.Timedelta(hours=0, minutes=0)], [pd.Timedelta(hours=12, minutes=0)]]), + ConvexPolytope(["resolution"], [["high"]]), + ConvexPolytope(["type"], [["fc"]]), + ConvexPolytope(["model"], [['ifs-nemo']]), + ConvexPolytope(["stream"], [["clte"]]), + ConvexPolytope(["realization"], ["1"]), + ConvexPolytope(["expver"], [['0001']]), + ConvexPolytope(["experiment"], [['ssp3-7.0']]), + ConvexPolytope(["generation"], [["1"]]), + ConvexPolytope(["levtype"], [["sfc"]]), + ConvexPolytope(["activity"], [["scenariomip"]]), + ConvexPolytope(["dataset"], [["climate-dt"]]), + ConvexPolytope(["class"], [["d1"]]), + ConvexPolytope(["date"], [[pd.Timestamp("20220811")], [pd.Timestamp("20220912")]]), + ConvexPolytope(["latitude", "longitude"], [[0, 0], [0.5, 0.5], [0, 0.5]])) + +qubeddatacube = QubedDatacube(fdb_tree, datacube_axes, datacube_transformations) +slicer = QubedSlicer() +self_API = Polytope( + datacube=qubeddatacube, + engine=slicer, + options=options, +) +result = self_API.retrieve(request) + +print(result) + +# sliced_tree = actual_slice(fdb_tree, combi_polytopes, datacube_axes, datacube_transformations) From 21519f5d94787cce0237c3d3c9a7a061f1f16521 Mon Sep 17 00:00:00 2001 From: mathleur Date: Tue, 22 Apr 2025 17:14:12 +0200 Subject: [PATCH 18/28] WIP extract data for qubed datacube --- .../datacube/backends/datacube.py | 8 +- polytope_feature/datacube/backends/qubed.py | 344 +++++++++++++++++- polytope_feature/datacube/datacube_axis.py | 1 + .../mapper_types/healpix_nested.py | 2 +- tests/test_qubed_extraction_engine.py | 14 +- 5 files changed, 356 insertions(+), 13 deletions(-) diff --git a/polytope_feature/datacube/backends/datacube.py b/polytope_feature/datacube/backends/datacube.py index 43f9d0456..22029502b 100644 --- a/polytope_feature/datacube/backends/datacube.py +++ b/polytope_feature/datacube/backends/datacube.py @@ -167,7 +167,13 @@ def create(datacube, config={}, axis_options={}, compressed_axes_options=[], alt ) return fdbdatacube if type(datacube).__name__ == "QubedDatacube": - return datacube + from .qubed import QubedDatacube + # TODO: here we create the qubeddatacube twice..., which we do not want + print("WHAT ARE THE AXIS OPTIONS HERE??") + print(axis_options) + qubed_datacube = QubedDatacube(datacube.q, datacube.datacube_axes, datacube.datacube_transformations, + config, axis_options, compressed_axes_options, alternative_axes, context) + return qubed_datacube def check_branching_axes(self, request): pass diff --git a/polytope_feature/datacube/backends/qubed.py b/polytope_feature/datacube/backends/qubed.py index 5dfa43b68..4e9f44900 100644 --- a/polytope_feature/datacube/backends/qubed.py +++ b/polytope_feature/datacube/backends/qubed.py @@ -2,6 +2,9 @@ import operator from copy import deepcopy from itertools import product +from ...utility.exceptions import BadGridError, BadRequestError, GribJumpNoIndexError +from ...utility.geometry import nearest_pt +import pygribjump as pygj from .datacube import Datacube, TensorIndexTree @@ -11,14 +14,347 @@ class QubedDatacube(Datacube): def __init__( self, q, datacube_axes, datacube_transformations, config=None, axis_options=None, compressed_axes_options=[], alternative_axes=[], context=None ): + if config is None: + config = {} + if axis_options is None: + axis_options = {} + self.q = q # TODO: find datacube_axes and datacube_transformations from options like other datacube backends self.datacube_axes = datacube_axes self.datacube_transformations = datacube_transformations # TODO: find compressed_axes list self.compressed_axes = [] - self._axes = datacube_axes + # self._axes = datacube_axes + # TODO: should the gj object be passed in instead? + self.gj = pygj.GribJump() + + # TODO: this doesn't fill the axes as wanted + super().__init__(axis_options, compressed_axes_options) + + # self._axes = datacube_axes + # TODO: where do these come from and are they right? + self.unwanted_path = {} + + # TODO: is this right? + + self.axis_options = axis_options + # Find values in the level 3 FDB datacube + + self.fdb_coordinates = {} + print("WHAT ARE THE AXIS OPTIONS") + print(axis_options) + + # TODO: we instead now have a list of axes with the actual axes types... + + for axis_name in datacube_axes: + axis = datacube_axes[axis_name] + self.fdb_coordinates[axis_name] = [axis.type] + + self.fdb_coordinates["values"] = [] + for name, values in self.fdb_coordinates.items(): + options = None + for opt in self.axis_options: + if opt.axis_name == name: + options = opt + + self._check_and_add_axes(options, name, values) + self.treated_axes.append(name) + self.complete_axes.append(name) + + # add other options to axis which were just created above like "lat" for the mapper transformations for eg + for name in self._axes: + if name not in self.treated_axes: + options = None + for opt in self.axis_options: + if opt.axis_name == name: + options = opt + + val = self._axes[name].type + self._check_and_add_axes(options, name, val) + + # def get(self, requests: TensorIndexTree, context): + # # TODO: use GJ to extract data from an fdb + # return requests + print("WHAT's INSIDE OF FDB?") + print(self.gj.axes({"class": "d1", "model": "ifs-nemo"})) + + def get(self, requests, context=None): + if context is None: + context = {} + if len(requests.children) == 0: + return requests + fdb_requests = [] + fdb_requests_decoding_info = [] + self.get_fdb_requests(requests, fdb_requests, fdb_requests_decoding_info) + + # here, loop through the fdb requests and request from gj and directly add to the nodes + complete_list_complete_uncompressed_requests = [] + complete_fdb_decoding_info = [] + for j, compressed_request in enumerate(fdb_requests): + uncompressed_request = {} + + # Need to determine the possible decompressed requests + + # find the possible combinations of compressed indices + interm_branch_tuple_values = [] + for key in compressed_request[0].keys(): + interm_branch_tuple_values.append(compressed_request[0][key]) + request_combis = product(*interm_branch_tuple_values) + + # Need to extract the possible requests and add them to the right nodes + for combi in request_combis: + uncompressed_request = {} + for i, key in enumerate(compressed_request[0].keys()): + uncompressed_request[key] = combi[i] + complete_uncompressed_request = (uncompressed_request, compressed_request[1], self.grid_md5_hash) + complete_list_complete_uncompressed_requests.append(complete_uncompressed_request) + complete_fdb_decoding_info.append(fdb_requests_decoding_info[j]) + + if logging.root.level <= logging.DEBUG: + printed_list_to_gj = complete_list_complete_uncompressed_requests[::1000] + logging.debug("The requests we give GribJump are: %s", printed_list_to_gj) + logging.info("Requests given to GribJump extract for %s", context) + try: + print("HER ELOOOK NOW WHAT WE GIVE TO GJ") + print(complete_list_complete_uncompressed_requests) + output_values = self.gj.extract(complete_list_complete_uncompressed_requests, context) + except Exception as e: + if "BadValue: Grid hash mismatch" in str(e): + logging.info("Error is: %s", e) + raise BadGridError() + if "Missing JumpInfo" in str(e): + logging.info("Error is: %s", e) + raise GribJumpNoIndexError() + else: + raise e + + logging.info("Requests extracted from GribJump for %s", context) + if logging.root.level <= logging.DEBUG: + printed_output_values = output_values[::1000] + logging.debug("GribJump outputs: %s", printed_output_values) + self.assign_fdb_output_to_nodes(output_values, complete_fdb_decoding_info) + + def get_fdb_requests( + self, + requests, + fdb_requests=[], + fdb_requests_decoding_info=[], + leaf_path=None, + ): + if leaf_path is None: + leaf_path = {} + + # First when request node is root, go to its children + if requests.key == "root": + logging.debug("Looking for data for the tree") + + for c in requests.children: + self.get_fdb_requests(c, fdb_requests, fdb_requests_decoding_info) + # If request node has no children, we have a leaf so need to assign fdb values to it + else: + key_value_path = {requests.key: requests.values} + # ax = requests.axis + ax = self._axes[requests.key] + (key_value_path, leaf_path, self.unwanted_path) = ax.unmap_path_key( + key_value_path, leaf_path, self.unwanted_path + ) + leaf_path.update(key_value_path) + if len(requests.children[0].children[0].children) == 0: + # find the fdb_requests and associated nodes to which to add results + (path, current_start_idxs, fdb_node_ranges, lat_length) = self.get_2nd_last_values(requests, leaf_path) + ( + original_indices, + sorted_request_ranges, + fdb_node_ranges, + ) = self.sort_fdb_request_ranges(current_start_idxs, lat_length, fdb_node_ranges) + fdb_requests.append((path, sorted_request_ranges)) + fdb_requests_decoding_info.append((original_indices, fdb_node_ranges)) + + # Otherwise remap the path for this key and iterate again over children + else: + for c in requests.children: + self.get_fdb_requests(c, fdb_requests, fdb_requests_decoding_info, leaf_path) + + def remove_duplicates_in_request_ranges(self, fdb_node_ranges, current_start_idxs): + # seen_indices = set() + # for i, idxs_list in enumerate(current_start_idxs): + # for k, sub_lat_idxs in enumerate(idxs_list): + # actual_fdb_node = fdb_node_ranges[i][k] + # original_fdb_node_range_vals = [] + # new_current_start_idx = [] + # for j, idx in enumerate(sub_lat_idxs): + # if idx not in seen_indices: + # # NOTE: need to remove it from the values in the corresponding tree node + # # NOTE: need to read just the range we give to gj + # original_fdb_node_range_vals.append(list(actual_fdb_node[0].values)[j]) + # seen_indices.add(idx) + # new_current_start_idx.append(idx) + # if original_fdb_node_range_vals != []: + # actual_fdb_node[0].values = tuple(original_fdb_node_range_vals) + # else: + # # there are no values on this node anymore so can remove it + # actual_fdb_node[0].remove_branch() + # if len(new_current_start_idx) == 0: + # current_start_idxs[i].pop(k) + # else: + # current_start_idxs[i][k] = new_current_start_idx + return (fdb_node_ranges, current_start_idxs) + + def nearest_lat_lon_search(self, requests): + if len(self.nearest_search) != 0: + first_ax_name = requests.children[0].key + second_ax_name = requests.children[0].children[0].key + + axes_in_nearest_search = [ + first_ax_name not in self.nearest_search.keys(), + second_ax_name not in self.nearest_search.keys(), + ] + + if all(not item for item in axes_in_nearest_search): + raise Exception("nearest point search axes are wrong") + + second_ax = self._axes[requests.children[0].children[0].key] + + nearest_pts = self.nearest_search.get((first_ax_name, second_ax_name), None) + if nearest_pts is None: + nearest_pts = self.nearest_search.get((second_ax_name, first_ax_name), None) + for i, pt in enumerate(nearest_pts): + nearest_pts[i] = [pt[1], pt[0]] + + transformed_nearest_pts = [] + for point in nearest_pts: + transformed_nearest_pts.append([point[0], second_ax._remap_val_to_axis_range(point[1])]) + + found_latlon_pts = [] + for lat_child in requests.children: + for lon_child in lat_child.children: + found_latlon_pts.append([lat_child.values, lon_child.values]) + + # now find the nearest lat lon to the points requested + nearest_latlons = [] + for pt in transformed_nearest_pts: + nearest_latlon = nearest_pt(found_latlon_pts, pt) + nearest_latlons.append(nearest_latlon) + + # need to remove the branches that do not fit + lat_children_values = [child.values for child in requests.children] + for i in range(len(lat_children_values)): + lat_child_val = lat_children_values[i] + lat_child = [child for child in requests.children if child.values == lat_child_val][0] + if lat_child.values not in [(latlon[0],) for latlon in nearest_latlons]: + lat_child.remove_branch() + else: + possible_lons = [latlon[1] for latlon in nearest_latlons if (latlon[0],) == lat_child.values] + lon_children_values = [child.values for child in lat_child.children] + for j in range(len(lon_children_values)): + lon_child_val = lon_children_values[j] + lon_child = [child for child in lat_child.children if child.values == lon_child_val][0] + for value in lon_child.values: + if value not in possible_lons: + lon_child.remove_compressed_branch(value) + + def get_2nd_last_values(self, requests, leaf_path=None): + if leaf_path is None: + leaf_path = {} + # In this function, we recursively loop over the last two layers of the tree and store the indices of the + # request ranges in those layers + self.nearest_lat_lon_search(requests) + + lat_length = len(requests.children) + current_start_idxs = [False] * lat_length + fdb_node_ranges = [False] * lat_length + for i in range(len(requests.children)): + lat_child = requests.children[i] + lon_length = len(lat_child.children) + current_start_idxs[i] = [None] * lon_length + fdb_node_ranges[i] = [[TensorIndexTree.root for y in range(lon_length)] for x in range(lon_length)] + current_start_idx = deepcopy(current_start_idxs[i]) + fdb_range_nodes = deepcopy(fdb_node_ranges[i]) + key_value_path = {lat_child.key: list(lat_child.values)} + ax = self._axes[lat_child.key] + (key_value_path, leaf_path, self.unwanted_path) = ax.unmap_path_key( + key_value_path, leaf_path, self.unwanted_path + ) + leaf_path.update(key_value_path) + (current_start_idxs[i], fdb_node_ranges[i]) = self.get_last_layer_before_leaf( + lat_child, leaf_path, current_start_idx, fdb_range_nodes + ) + + leaf_path_copy = deepcopy(leaf_path) + leaf_path_copy.pop("values", None) + return (leaf_path_copy, current_start_idxs, fdb_node_ranges, lat_length) + + def get_last_layer_before_leaf(self, requests, leaf_path, current_idx, fdb_range_n): + current_idx = [[] for i in range(len(requests.children))] + fdb_range_n = [[] for i in range(len(requests.children))] + for i, c in enumerate(requests.children): + # now c are the leaves of the initial tree + key_value_path = {c.key: list(c.values)} + ax = self._axes[c.key] + (key_value_path, leaf_path, self.unwanted_path) = ax.unmap_path_key( + key_value_path, leaf_path, self.unwanted_path + ) + # TODO: change this to accommodate non consecutive indexes being compressed too + current_idx[i].extend(key_value_path["values"]) + fdb_range_n[i].append(c) + return (current_idx, fdb_range_n) + + def assign_fdb_output_to_nodes(self, output_values, fdb_requests_decoding_info): + for k in range(len(output_values)): + request_output_values = output_values[k] + ( + original_indices, + fdb_node_ranges, + ) = fdb_requests_decoding_info[k] + sorted_fdb_range_nodes = [fdb_node_ranges[i] for i in original_indices] + for i in range(len(sorted_fdb_range_nodes)): + n = sorted_fdb_range_nodes[i][0] + if len(request_output_values[0]) == 0: + # If we are here, no data was found for this path in the fdb + none_array = [None] * len(n.values) + n.result.extend(none_array) + else: + interm_request_output_values = request_output_values[0][i][0] + n.result.extend(interm_request_output_values) - def get(self, requests: TensorIndexTree, context): - # TODO: use GJ to extract data from an fdb - return requests + def sort_fdb_request_ranges(self, current_start_idx, lat_length, fdb_node_ranges): + (new_fdb_node_ranges, new_current_start_idx) = self.remove_duplicates_in_request_ranges( + fdb_node_ranges, current_start_idx + ) + interm_request_ranges = [] + # TODO: modify the start indexes to have as many arrays as the request ranges + new_fdb_node_ranges = [] + for i in range(lat_length): + interm_fdb_nodes = fdb_node_ranges[i] + old_interm_start_idx = current_start_idx[i] + for j in range(len(old_interm_start_idx)): + # TODO: if we sorted the cyclic values in increasing order on the tree too, + # then we wouldn't have to sort here? + sorted_list = sorted(enumerate(old_interm_start_idx[j]), key=lambda x: x[1]) + original_indices_idx, interm_start_idx = zip(*sorted_list) + # TODO: !!!!!!! should really sort the values here again + # for interm_fdb_nodes_obj in interm_fdb_nodes[j]: + # interm_fdb_nodes_obj.values = tuple([list(interm_fdb_nodes_obj.values)[k] + # for k in original_indices_idx]) + if abs(interm_start_idx[-1] + 1 - interm_start_idx[0]) <= len(interm_start_idx): + current_request_ranges = (interm_start_idx[0], interm_start_idx[-1] + 1) + interm_request_ranges.append(current_request_ranges) + new_fdb_node_ranges.append(interm_fdb_nodes[j]) + else: + jumps = list(map(operator.sub, interm_start_idx[1:], interm_start_idx[:-1])) + last_idx = 0 + for k, jump in enumerate(jumps): + if jump > 1: + current_request_ranges = (interm_start_idx[last_idx], interm_start_idx[k] + 1) + new_fdb_node_ranges.append(interm_fdb_nodes[j]) + last_idx = k + 1 + interm_request_ranges.append(current_request_ranges) + if k == len(interm_start_idx) - 2: + current_request_ranges = (interm_start_idx[last_idx], interm_start_idx[-1] + 1) + interm_request_ranges.append(current_request_ranges) + new_fdb_node_ranges.append(interm_fdb_nodes[j]) + request_ranges_with_idx = list(enumerate(interm_request_ranges)) + sorted_list = sorted(request_ranges_with_idx, key=lambda x: x[1][0]) + original_indices, sorted_request_ranges = zip(*sorted_list) + return (original_indices, sorted_request_ranges, new_fdb_node_ranges) diff --git a/polytope_feature/datacube/datacube_axis.py b/polytope_feature/datacube/datacube_axis.py index fbf8a70cb..8840a8a2f 100644 --- a/polytope_feature/datacube/datacube_axis.py +++ b/polytope_feature/datacube/datacube_axis.py @@ -300,6 +300,7 @@ def __init__(self): self.range = None self.transformations = [] self.can_round = False + self.type = "" def parse(self, value: Any) -> Any: return value diff --git a/polytope_feature/datacube/transformations/datacube_mappers/mapper_types/healpix_nested.py b/polytope_feature/datacube/transformations/datacube_mappers/mapper_types/healpix_nested.py index 36a69492a..416976e59 100644 --- a/polytope_feature/datacube/transformations/datacube_mappers/mapper_types/healpix_nested.py +++ b/polytope_feature/datacube/transformations/datacube_mappers/mapper_types/healpix_nested.py @@ -146,7 +146,7 @@ def unmap(self, first_val, second_val, unmapped_idx=None): healpix_index = self.axes_idx_to_healpix_idx(first_idx, second_idx) # TODO: here do conversion of ring to nested healpix representation before returning healpix_index = self.ring_to_nested(healpix_index) - return healpix_index + return [healpix_index] def div_03(self, a, b): t = 1 if a >= (b << 1) else 0 diff --git a/tests/test_qubed_extraction_engine.py b/tests/test_qubed_extraction_engine.py index a6ebc41da..0c5a47e6a 100644 --- a/tests/test_qubed_extraction_engine.py +++ b/tests/test_qubed_extraction_engine.py @@ -72,14 +72,14 @@ "axis_config": [ {"axis_name": "step", "transformations": [{"name": "type_change", "type": "int"}]}, {"axis_name": "number", "transformations": [{"name": "type_change", "type": "int"}]}, - { - "axis_name": "date", - "transformations": [{"name": "merge", "other_axis": "time", "linkers": ["T", "00"]}], - }, + # { + # "axis_name": "date", + # "transformations": [{"name": "merge", "other_axis": "time", "linkers": ["T", "00"]}], + # }, { "axis_name": "values", "transformations": [ - {"name": "mapper", "type": "octahedral", "resolution": 1280, "axes": ["latitude", "longitude"]} + {"name": "mapper", "type": "healpix_nested", "resolution": 1024, "axes": ["latitude", "longitude"]} ], }, {"axis_name": "latitude", "transformations": [{"name": "reverse", "is_reverse": True}]}, @@ -115,7 +115,7 @@ # ) request = Request(ConvexPolytope(["param"], [["164"]]), - ConvexPolytope(["time"], [[pd.Timedelta(hours=0, minutes=0)], [pd.Timedelta(hours=12, minutes=0)]]), + ConvexPolytope(["time"], [[pd.Timedelta(hours=1, minutes=0)], [pd.Timedelta(hours=3, minutes=0)]]), ConvexPolytope(["resolution"], [["high"]]), ConvexPolytope(["type"], [["fc"]]), ConvexPolytope(["model"], [['ifs-nemo']]), @@ -128,7 +128,7 @@ ConvexPolytope(["activity"], [["scenariomip"]]), ConvexPolytope(["dataset"], [["climate-dt"]]), ConvexPolytope(["class"], [["d1"]]), - ConvexPolytope(["date"], [[pd.Timestamp("20220811")], [pd.Timestamp("20220912")]]), + ConvexPolytope(["date"], [[pd.Timestamp("20220811")]]), ConvexPolytope(["latitude", "longitude"], [[0, 0], [0.5, 0.5], [0, 0.5]])) qubeddatacube = QubedDatacube(fdb_tree, datacube_axes, datacube_transformations) From 636f32ab5317a41098a44482c844829ccfc2fbc6 Mon Sep 17 00:00:00 2001 From: mathleur Date: Wed, 23 Apr 2025 12:22:53 +0200 Subject: [PATCH 19/28] use new gj iterator for qubed backend WIP --- polytope_feature/datacube/backends/qubed.py | 28 ++++++++++++++++----- tests/test_qubed_extraction_engine.py | 2 ++ 2 files changed, 24 insertions(+), 6 deletions(-) diff --git a/polytope_feature/datacube/backends/qubed.py b/polytope_feature/datacube/backends/qubed.py index 4e9f44900..9a1518813 100644 --- a/polytope_feature/datacube/backends/qubed.py +++ b/polytope_feature/datacube/backends/qubed.py @@ -77,7 +77,7 @@ def __init__( # # TODO: use GJ to extract data from an fdb # return requests print("WHAT's INSIDE OF FDB?") - print(self.gj.axes({"class": "d1", "model": "ifs-nemo"})) + print(self.gj.axes({"class": "d1", "model": "ifs-nemo", "resolution": "high"})) def get(self, requests, context=None): if context is None: @@ -107,6 +107,8 @@ def get(self, requests, context=None): uncompressed_request = {} for i, key in enumerate(compressed_request[0].keys()): uncompressed_request[key] = combi[i] + # TODO: get the hash from somewhere... + self.grid_md5_hash = "cbda19e48d4d7e5e22641154878b9b22" complete_uncompressed_request = (uncompressed_request, compressed_request[1], self.grid_md5_hash) complete_list_complete_uncompressed_requests.append(complete_uncompressed_request) complete_fdb_decoding_info.append(fdb_requests_decoding_info[j]) @@ -159,6 +161,20 @@ def get_fdb_requests( (key_value_path, leaf_path, self.unwanted_path) = ax.unmap_path_key( key_value_path, leaf_path, self.unwanted_path ) + # print("HERE NOW LOOK") + # print(requests.key) + # print(key_value_path) + # TODO: change to use the datacube trasnformations instead... + if requests.key == "time": + new_vals = [] + for val in key_value_path[requests.key]: + new_vals.append(val[7:9]+val[10:12]) + key_value_path[requests.key] = new_vals + if requests.key == "date": + new_vals = [] + for val in key_value_path[requests.key]: + new_vals.append(val[:4] + val[5:7] + val[8:10]) + key_value_path[requests.key] = new_vals leaf_path.update(key_value_path) if len(requests.children[0].children[0].children) == 0: # find the fdb_requests and associated nodes to which to add results @@ -301,8 +317,8 @@ def get_last_layer_before_leaf(self, requests, leaf_path, current_idx, fdb_range return (current_idx, fdb_range_n) def assign_fdb_output_to_nodes(self, output_values, fdb_requests_decoding_info): - for k in range(len(output_values)): - request_output_values = output_values[k] + for k, request_output_values in enumerate(output_values): + # request_output_values = output_values[k] ( original_indices, fdb_node_ranges, @@ -310,13 +326,13 @@ def assign_fdb_output_to_nodes(self, output_values, fdb_requests_decoding_info): sorted_fdb_range_nodes = [fdb_node_ranges[i] for i in original_indices] for i in range(len(sorted_fdb_range_nodes)): n = sorted_fdb_range_nodes[i][0] - if len(request_output_values[0]) == 0: + if len(request_output_values.values) == 0: # If we are here, no data was found for this path in the fdb none_array = [None] * len(n.values) n.result.extend(none_array) else: - interm_request_output_values = request_output_values[0][i][0] - n.result.extend(interm_request_output_values) + # interm_request_output_values = request_output_values[0][i][0] + n.result.extend(request_output_values.values[i]) def sort_fdb_request_ranges(self, current_start_idx, lat_length, fdb_node_ranges): (new_fdb_node_ranges, new_current_start_idx) = self.remove_duplicates_in_request_ranges( diff --git a/tests/test_qubed_extraction_engine.py b/tests/test_qubed_extraction_engine.py index 0c5a47e6a..c99781544 100644 --- a/tests/test_qubed_extraction_engine.py +++ b/tests/test_qubed_extraction_engine.py @@ -76,6 +76,8 @@ # "axis_name": "date", # "transformations": [{"name": "merge", "other_axis": "time", "linkers": ["T", "00"]}], # }, + # {"axis_name": "date", "transformations": [{"name": "type_change", "type": "date"}]}, + # {"axis_name": "time", "transformations": [{"name": "type_change", "type": "time"}]}, { "axis_name": "values", "transformations": [ From 8acef9f5cc5e8b4c7f9349e191867198b0542110 Mon Sep 17 00:00:00 2001 From: mathleur Date: Wed, 23 Apr 2025 15:12:25 +0200 Subject: [PATCH 20/28] make prototype qubed integration work and compare against fdb --- .../datacube/backends/datacube.py | 2 - polytope_feature/datacube/backends/fdb.py | 429 +++++++++++++++++- polytope_feature/datacube/backends/qubed.py | 28 +- .../datacube_mappers/datacube_mappers.py | 2 + .../mapper_types/healpix_nested.py | 23 +- polytope_feature/engine/hullslicer.py | 3 + tests/test_qubed_extraction_engine.py | 87 +++- 7 files changed, 541 insertions(+), 33 deletions(-) diff --git a/polytope_feature/datacube/backends/datacube.py b/polytope_feature/datacube/backends/datacube.py index 22029502b..161517b02 100644 --- a/polytope_feature/datacube/backends/datacube.py +++ b/polytope_feature/datacube/backends/datacube.py @@ -169,8 +169,6 @@ def create(datacube, config={}, axis_options={}, compressed_axes_options=[], alt if type(datacube).__name__ == "QubedDatacube": from .qubed import QubedDatacube # TODO: here we create the qubeddatacube twice..., which we do not want - print("WHAT ARE THE AXIS OPTIONS HERE??") - print(axis_options) qubed_datacube = QubedDatacube(datacube.q, datacube.datacube_axes, datacube.datacube_transformations, config, axis_options, compressed_axes_options, alternative_axes, context) return qubed_datacube diff --git a/polytope_feature/datacube/backends/fdb.py b/polytope_feature/datacube/backends/fdb.py index af027cd04..e2778d291 100644 --- a/polytope_feature/datacube/backends/fdb.py +++ b/polytope_feature/datacube/backends/fdb.py @@ -1,3 +1,414 @@ +# import logging +# import operator +# from copy import deepcopy +# from itertools import product + +# from ...utility.exceptions import BadGridError, BadRequestError, GribJumpNoIndexError +# from ...utility.geometry import nearest_pt +# from .datacube import Datacube, TensorIndexTree + + +# class FDBDatacube(Datacube): +# def __init__( +# self, gj, config=None, axis_options=None, compressed_axes_options=[], alternative_axes=[], context=None +# ): +# if config is None: +# config = {} +# if context is None: +# context = {} + +# super().__init__(axis_options, compressed_axes_options) +# print(axis_options) + +# logging.info("Created an FDB datacube with options: " + str(axis_options)) + +# self.unwanted_path = {} +# self.axis_options = axis_options + +# partial_request = config +# # Find values in the level 3 FDB datacube + +# self.gj = gj +# if len(alternative_axes) == 0: +# logging.info("Find GribJump axes for %s", context) +# self.fdb_coordinates = self.gj.axes(partial_request, ctx=context) +# logging.info("Retrieved available GribJump axes for %s", context) +# if len(self.fdb_coordinates) == 0 or set(partial_request) > set(self.fdb_coordinates): +# raise BadRequestError(partial_request) +# else: +# self.fdb_coordinates = {} +# for axis_config in alternative_axes: +# self.fdb_coordinates[axis_config.axis_name] = axis_config.values + +# fdb_coordinates_copy = deepcopy(self.fdb_coordinates) +# for axis, vals in fdb_coordinates_copy.items(): +# if len(vals) == 1: +# if vals[0] == "": +# self.fdb_coordinates.pop(axis) + +# logging.info("Axes returned from GribJump are: " + str(self.fdb_coordinates)) + +# self.fdb_coordinates["values"] = [] +# for name, values in self.fdb_coordinates.items(): +# values.sort() +# options = None +# for opt in self.axis_options: +# if opt.axis_name == name: +# options = opt + +# self._check_and_add_axes(options, name, values) +# self.treated_axes.append(name) +# self.complete_axes.append(name) + +# # add other options to axis which were just created above like "lat" for the mapper transformations for eg +# for name in self._axes: +# if name not in self.treated_axes: +# options = None +# for opt in self.axis_options: +# if opt.axis_name == name: +# options = opt + +# val = self._axes[name].type +# self._check_and_add_axes(options, name, val) + +# logging.info("Polytope created axes for %s", self._axes.keys()) + +# def check_branching_axes(self, request): +# polytopes = request.polytopes() +# for polytope in polytopes: +# for ax in polytope._axes: +# if ax == "levtype": +# (upper, lower, idx) = polytope.extents(ax) +# if "sfc" in polytope.points[idx]: +# self.fdb_coordinates.pop("levelist", None) + +# if ax == "param": +# (upper, lower, idx) = polytope.extents(ax) +# if "140251" not in polytope.points[idx]: +# self.fdb_coordinates.pop("direction", None) +# self.fdb_coordinates.pop("frequency", None) +# else: +# # special param with direction and frequency +# if len(polytope.points[idx]) > 1: +# raise ValueError( +# "Param 251 is part of a special branching of the datacube. Please request it separately." # noqa: E501 +# ) +# self.fdb_coordinates.pop("quantile", None) +# self.fdb_coordinates.pop("year", None) +# self.fdb_coordinates.pop("month", None) + +# # NOTE: verify that we also remove the axis object for axes we've removed here +# axes_to_remove = set(self.complete_axes) - set(self.fdb_coordinates.keys()) + +# # Remove the keys from self._axes +# for axis_name in axes_to_remove: +# self._axes.pop(axis_name, None) + +# def get(self, requests: TensorIndexTree, context=None): +# # print("EVER GOT FDB DATA??") +# if context is None: +# context = {} +# if len(requests.children) == 0: +# return requests +# fdb_requests = [] +# fdb_requests_decoding_info = [] +# self.get_fdb_requests(requests, fdb_requests, fdb_requests_decoding_info) + +# # here, loop through the fdb requests and request from gj and directly add to the nodes +# complete_list_complete_uncompressed_requests = [] +# complete_fdb_decoding_info = [] +# for j, compressed_request in enumerate(fdb_requests): +# uncompressed_request = {} + +# # Need to determine the possible decompressed requests + +# # find the possible combinations of compressed indices +# interm_branch_tuple_values = [] +# for key in compressed_request[0].keys(): +# interm_branch_tuple_values.append(compressed_request[0][key]) +# request_combis = product(*interm_branch_tuple_values) + +# # Need to extract the possible requests and add them to the right nodes +# for combi in request_combis: +# uncompressed_request = {} +# for i, key in enumerate(compressed_request[0].keys()): +# uncompressed_request[key] = combi[i] +# complete_uncompressed_request = (uncompressed_request, compressed_request[1], self.grid_md5_hash) +# complete_list_complete_uncompressed_requests.append(complete_uncompressed_request) +# complete_fdb_decoding_info.append(fdb_requests_decoding_info[j]) + +# if logging.root.level <= logging.DEBUG: +# printed_list_to_gj = complete_list_complete_uncompressed_requests[::1000] +# logging.debug("The requests we give GribJump are: %s", printed_list_to_gj) +# logging.info("Requests given to GribJump extract for %s", context) +# try: +# output_values = self.gj.extract(complete_list_complete_uncompressed_requests, context) +# except Exception as e: +# if "BadValue: Grid hash mismatch" in str(e): +# logging.info("Error is: %s", e) +# raise BadGridError() +# if "Missing JumpInfo" in str(e): +# logging.info("Error is: %s", e) +# raise GribJumpNoIndexError() +# else: +# raise e + +# logging.info("Requests extracted from GribJump for %s", context) +# if logging.root.level <= logging.DEBUG: +# printed_output_values = output_values[::1000] +# logging.debug("GribJump outputs: %s", printed_output_values) +# self.assign_fdb_output_to_nodes(output_values, complete_fdb_decoding_info) + +# def get_fdb_requests( +# self, +# requests: TensorIndexTree, +# fdb_requests=[], +# fdb_requests_decoding_info=[], +# leaf_path=None, +# ): +# if leaf_path is None: +# leaf_path = {} + +# # First when request node is root, go to its children +# if requests.axis.name == "root": +# logging.debug("Looking for data for the tree") + +# for c in requests.children: +# self.get_fdb_requests(c, fdb_requests, fdb_requests_decoding_info) +# # If request node has no children, we have a leaf so need to assign fdb values to it +# else: +# key_value_path = {requests.axis.name: requests.values} +# ax = requests.axis +# (key_value_path, leaf_path, self.unwanted_path) = ax.unmap_path_key( +# key_value_path, leaf_path, self.unwanted_path +# ) +# leaf_path.update(key_value_path) +# if len(requests.children[0].children[0].children) == 0: +# # find the fdb_requests and associated nodes to which to add results +# (path, current_start_idxs, fdb_node_ranges, lat_length) = self.get_2nd_last_values(requests, leaf_path) +# ( +# original_indices, +# sorted_request_ranges, +# fdb_node_ranges, +# ) = self.sort_fdb_request_ranges(current_start_idxs, lat_length, fdb_node_ranges) +# fdb_requests.append((path, sorted_request_ranges)) +# fdb_requests_decoding_info.append((original_indices, fdb_node_ranges)) + +# # Otherwise remap the path for this key and iterate again over children +# else: +# for c in requests.children: +# self.get_fdb_requests(c, fdb_requests, fdb_requests_decoding_info, leaf_path) + +# def remove_duplicates_in_request_ranges(self, fdb_node_ranges, current_start_idxs): +# seen_indices = set() +# for i, idxs_list in enumerate(current_start_idxs): +# for k, sub_lat_idxs in enumerate(idxs_list): +# actual_fdb_node = fdb_node_ranges[i][k] +# original_fdb_node_range_vals = [] +# new_current_start_idx = [] +# for j, idx in enumerate(sub_lat_idxs): +# if idx not in seen_indices: +# # NOTE: need to remove it from the values in the corresponding tree node +# # NOTE: need to read just the range we give to gj +# original_fdb_node_range_vals.append(actual_fdb_node[0].values[j]) +# seen_indices.add(idx) +# new_current_start_idx.append(idx) +# if original_fdb_node_range_vals != []: +# actual_fdb_node[0].values = tuple(original_fdb_node_range_vals) +# else: +# # there are no values on this node anymore so can remove it +# actual_fdb_node[0].remove_branch() +# if len(new_current_start_idx) == 0: +# current_start_idxs[i].pop(k) +# else: +# current_start_idxs[i][k] = new_current_start_idx +# return (fdb_node_ranges, current_start_idxs) + +# def nearest_lat_lon_search(self, requests): +# if len(self.nearest_search) != 0: +# first_ax_name = requests.children[0].axis.name +# second_ax_name = requests.children[0].children[0].axis.name + +# axes_in_nearest_search = [ +# first_ax_name not in self.nearest_search.keys(), +# second_ax_name not in self.nearest_search.keys(), +# ] + +# if all(not item for item in axes_in_nearest_search): +# raise Exception("nearest point search axes are wrong") + +# second_ax = requests.children[0].children[0].axis + +# nearest_pts = self.nearest_search.get((first_ax_name, second_ax_name), None) +# if nearest_pts is None: +# nearest_pts = self.nearest_search.get((second_ax_name, first_ax_name), None) +# for i, pt in enumerate(nearest_pts): +# nearest_pts[i] = [pt[1], pt[0]] + +# transformed_nearest_pts = [] +# for point in nearest_pts: +# transformed_nearest_pts.append([point[0], second_ax._remap_val_to_axis_range(point[1])]) + +# found_latlon_pts = [] +# for lat_child in requests.children: +# for lon_child in lat_child.children: +# found_latlon_pts.append([lat_child.values, lon_child.values]) + +# # now find the nearest lat lon to the points requested +# nearest_latlons = [] +# for pt in transformed_nearest_pts: +# nearest_latlon = nearest_pt(found_latlon_pts, pt) +# nearest_latlons.append(nearest_latlon) + +# # need to remove the branches that do not fit +# lat_children_values = [child.values for child in requests.children] +# for i in range(len(lat_children_values)): +# lat_child_val = lat_children_values[i] +# lat_child = [child for child in requests.children if child.values == lat_child_val][0] +# if lat_child.values not in [(latlon[0],) for latlon in nearest_latlons]: +# lat_child.remove_branch() +# else: +# possible_lons = [latlon[1] for latlon in nearest_latlons if (latlon[0],) == lat_child.values] +# lon_children_values = [child.values for child in lat_child.children] +# for j in range(len(lon_children_values)): +# lon_child_val = lon_children_values[j] +# lon_child = [child for child in lat_child.children if child.values == lon_child_val][0] +# for value in lon_child.values: +# if value not in possible_lons: +# lon_child.remove_compressed_branch(value) + +# def get_2nd_last_values(self, requests, leaf_path=None): +# if leaf_path is None: +# leaf_path = {} +# # In this function, we recursively loop over the last two layers of the tree and store the indices of the +# # request ranges in those layers +# self.nearest_lat_lon_search(requests) + +# lat_length = len(requests.children) +# current_start_idxs = [False] * lat_length +# fdb_node_ranges = [False] * lat_length +# for i in range(len(requests.children)): +# lat_child = requests.children[i] +# lon_length = len(lat_child.children) +# current_start_idxs[i] = [None] * lon_length +# fdb_node_ranges[i] = [[TensorIndexTree.root for y in range(lon_length)] for x in range(lon_length)] +# current_start_idx = deepcopy(current_start_idxs[i]) +# fdb_range_nodes = deepcopy(fdb_node_ranges[i]) +# key_value_path = {lat_child.axis.name: lat_child.values} +# ax = lat_child.axis +# (key_value_path, leaf_path, self.unwanted_path) = ax.unmap_path_key( +# key_value_path, leaf_path, self.unwanted_path +# ) +# leaf_path.update(key_value_path) +# (current_start_idxs[i], fdb_node_ranges[i]) = self.get_last_layer_before_leaf( +# lat_child, leaf_path, current_start_idx, fdb_range_nodes +# ) + +# leaf_path_copy = deepcopy(leaf_path) +# leaf_path_copy.pop("values", None) +# return (leaf_path_copy, current_start_idxs, fdb_node_ranges, lat_length) + +# def get_last_layer_before_leaf(self, requests, leaf_path, current_idx, fdb_range_n): +# current_idx = [[] for i in range(len(requests.children))] +# fdb_range_n = [[] for i in range(len(requests.children))] +# for i, c in enumerate(requests.children): +# # now c are the leaves of the initial tree +# key_value_path = {c.axis.name: c.values} +# ax = c.axis +# (key_value_path, leaf_path, self.unwanted_path) = ax.unmap_path_key( +# key_value_path, leaf_path, self.unwanted_path +# ) +# # TODO: change this to accommodate non consecutive indexes being compressed too +# current_idx[i].extend(key_value_path["values"]) +# fdb_range_n[i].append(c) +# return (current_idx, fdb_range_n) + +# def assign_fdb_output_to_nodes(self, output_values, fdb_requests_decoding_info): +# for k in range(len(output_values)): +# request_output_values = output_values[k] +# ( +# original_indices, +# fdb_node_ranges, +# ) = fdb_requests_decoding_info[k] +# sorted_fdb_range_nodes = [fdb_node_ranges[i] for i in original_indices] +# for i in range(len(sorted_fdb_range_nodes)): +# n = sorted_fdb_range_nodes[i][0] +# if len(request_output_values[0]) == 0: +# # If we are here, no data was found for this path in the fdb +# none_array = [None] * len(n.values) +# n.result.extend(none_array) +# else: +# interm_request_output_values = request_output_values[0][i][0] +# n.result.extend(interm_request_output_values) + +# def sort_fdb_request_ranges(self, current_start_idx, lat_length, fdb_node_ranges): +# (new_fdb_node_ranges, new_current_start_idx) = self.remove_duplicates_in_request_ranges( +# fdb_node_ranges, current_start_idx +# ) +# interm_request_ranges = [] +# # TODO: modify the start indexes to have as many arrays as the request ranges +# new_fdb_node_ranges = [] +# for i in range(lat_length): +# interm_fdb_nodes = fdb_node_ranges[i] +# old_interm_start_idx = current_start_idx[i] +# for j in range(len(old_interm_start_idx)): +# # TODO: if we sorted the cyclic values in increasing order on the tree too, +# # then we wouldn't have to sort here? +# sorted_list = sorted(enumerate(old_interm_start_idx[j]), key=lambda x: x[1]) +# original_indices_idx, interm_start_idx = zip(*sorted_list) +# for interm_fdb_nodes_obj in interm_fdb_nodes[j]: +# interm_fdb_nodes_obj.values = tuple([interm_fdb_nodes_obj.values[k] for k in original_indices_idx]) +# if abs(interm_start_idx[-1] + 1 - interm_start_idx[0]) <= len(interm_start_idx): +# current_request_ranges = (interm_start_idx[0], interm_start_idx[-1] + 1) +# interm_request_ranges.append(current_request_ranges) +# new_fdb_node_ranges.append(interm_fdb_nodes[j]) +# else: +# jumps = list(map(operator.sub, interm_start_idx[1:], interm_start_idx[:-1])) +# last_idx = 0 +# for k, jump in enumerate(jumps): +# if jump > 1: +# current_request_ranges = (interm_start_idx[last_idx], interm_start_idx[k] + 1) +# new_fdb_node_ranges.append(interm_fdb_nodes[j]) +# last_idx = k + 1 +# interm_request_ranges.append(current_request_ranges) +# if k == len(interm_start_idx) - 2: +# current_request_ranges = (interm_start_idx[last_idx], interm_start_idx[-1] + 1) +# interm_request_ranges.append(current_request_ranges) +# new_fdb_node_ranges.append(interm_fdb_nodes[j]) +# request_ranges_with_idx = list(enumerate(interm_request_ranges)) +# sorted_list = sorted(request_ranges_with_idx, key=lambda x: x[1][0]) +# original_indices, sorted_request_ranges = zip(*sorted_list) +# return (original_indices, sorted_request_ranges, new_fdb_node_ranges) + +# def datacube_natural_indexes(self, axis, subarray): +# indexes = subarray.get(axis.name, None) +# return indexes + +# def select(self, path, unmapped_path): +# return self.fdb_coordinates + +# def ax_vals(self, name): +# return self.fdb_coordinates.get(name, None) + +# def prep_tree_encoding(self, node, unwanted_path=None): +# # TODO: prepare the tree for protobuf encoding +# # ie transform all axes for gribjump and adding the index property on the leaves +# if unwanted_path is None: +# unwanted_path = {} + +# ax = node.axis +# (new_node, unwanted_path) = ax.unmap_tree_node(node, unwanted_path) + +# if len(node.children) != 0: +# for c in new_node.children: +# self.prep_tree_encoding(c, unwanted_path) + +# def prep_tree_decoding(self, tree): +# # TODO: transform the tree after decoding from protobuf +# # ie unstransform all axes from gribjump and put the indexes back as a leaf/extra node +# pass + + import logging import operator from copy import deepcopy @@ -18,7 +429,6 @@ def __init__( context = {} super().__init__(axis_options, compressed_axes_options) - print(axis_options) logging.info("Created an FDB datacube with options: " + str(axis_options)) @@ -141,7 +551,7 @@ def get(self, requests: TensorIndexTree, context=None): logging.debug("The requests we give GribJump are: %s", printed_list_to_gj) logging.info("Requests given to GribJump extract for %s", context) try: - output_values = self.gj.extract(complete_list_complete_uncompressed_requests, context) + iterator = self.gj.extract(complete_list_complete_uncompressed_requests, context) except Exception as e: if "BadValue: Grid hash mismatch" in str(e): logging.info("Error is: %s", e) @@ -153,10 +563,7 @@ def get(self, requests: TensorIndexTree, context=None): raise e logging.info("Requests extracted from GribJump for %s", context) - if logging.root.level <= logging.DEBUG: - printed_output_values = output_values[::1000] - logging.debug("GribJump outputs: %s", printed_output_values) - self.assign_fdb_output_to_nodes(output_values, complete_fdb_decoding_info) + self.assign_fdb_output_to_nodes(iterator, complete_fdb_decoding_info) def get_fdb_requests( self, @@ -322,9 +729,8 @@ def get_last_layer_before_leaf(self, requests, leaf_path, current_idx, fdb_range fdb_range_n[i].append(c) return (current_idx, fdb_range_n) - def assign_fdb_output_to_nodes(self, output_values, fdb_requests_decoding_info): - for k in range(len(output_values)): - request_output_values = output_values[k] + def assign_fdb_output_to_nodes(self, output_iterator, fdb_requests_decoding_info): + for k, result in enumerate(output_iterator): ( original_indices, fdb_node_ranges, @@ -332,13 +738,12 @@ def assign_fdb_output_to_nodes(self, output_values, fdb_requests_decoding_info): sorted_fdb_range_nodes = [fdb_node_ranges[i] for i in original_indices] for i in range(len(sorted_fdb_range_nodes)): n = sorted_fdb_range_nodes[i][0] - if len(request_output_values[0]) == 0: + if len(result.values) == 0: # If we are here, no data was found for this path in the fdb none_array = [None] * len(n.values) n.result.extend(none_array) else: - interm_request_output_values = request_output_values[0][i][0] - n.result.extend(interm_request_output_values) + n.result.extend(result.values[i]) def sort_fdb_request_ranges(self, current_start_idx, lat_length, fdb_node_ranges): (new_fdb_node_ranges, new_current_start_idx) = self.remove_duplicates_in_request_ranges( diff --git a/polytope_feature/datacube/backends/qubed.py b/polytope_feature/datacube/backends/qubed.py index 9a1518813..dde9a9897 100644 --- a/polytope_feature/datacube/backends/qubed.py +++ b/polytope_feature/datacube/backends/qubed.py @@ -5,6 +5,7 @@ from ...utility.exceptions import BadGridError, BadRequestError, GribJumpNoIndexError from ...utility.geometry import nearest_pt import pygribjump as pygj +from qubed.value_types import QEnum from .datacube import Datacube, TensorIndexTree @@ -76,8 +77,8 @@ def __init__( # def get(self, requests: TensorIndexTree, context): # # TODO: use GJ to extract data from an fdb # return requests - print("WHAT's INSIDE OF FDB?") - print(self.gj.axes({"class": "d1", "model": "ifs-nemo", "resolution": "high"})) + # print("WHAT's INSIDE OF FDB?") + # print(self.gj.axes({"class": "d1", "model": "ifs-nemo", "resolution": "high"})) def get(self, requests, context=None): if context is None: @@ -118,8 +119,8 @@ def get(self, requests, context=None): logging.debug("The requests we give GribJump are: %s", printed_list_to_gj) logging.info("Requests given to GribJump extract for %s", context) try: - print("HER ELOOOK NOW WHAT WE GIVE TO GJ") - print(complete_list_complete_uncompressed_requests) + # print("HER ELOOOK NOW WHAT WE GIVE TO GJ") + # print(complete_list_complete_uncompressed_requests) output_values = self.gj.extract(complete_list_complete_uncompressed_requests, context) except Exception as e: if "BadValue: Grid hash mismatch" in str(e): @@ -308,9 +309,12 @@ def get_last_layer_before_leaf(self, requests, leaf_path, current_idx, fdb_range # now c are the leaves of the initial tree key_value_path = {c.key: list(c.values)} ax = self._axes[c.key] + # print("LOOK HERE IF WE HAVE SAME NUM VALS") + # print(list(c.values)) (key_value_path, leaf_path, self.unwanted_path) = ax.unmap_path_key( key_value_path, leaf_path, self.unwanted_path ) + # print(key_value_path["values"]) # TODO: change this to accommodate non consecutive indexes being compressed too current_idx[i].extend(key_value_path["values"]) fdb_range_n[i].append(c) @@ -329,10 +333,16 @@ def assign_fdb_output_to_nodes(self, output_values, fdb_requests_decoding_info): if len(request_output_values.values) == 0: # If we are here, no data was found for this path in the fdb none_array = [None] * len(n.values) - n.result.extend(none_array) + # n.result.extend(none_array) + if n.data.metadata.get("result", None) is None: + n.data.metadata["result"] = [] + n.data.metadata["result"].extend(none_array) else: # interm_request_output_values = request_output_values[0][i][0] - n.result.extend(request_output_values.values[i]) + # n.result.extend(request_output_values.values[i]) + if n.data.metadata.get("result", None) is None: + n.data.metadata["result"] = [] + n.data.metadata["result"].extend(request_output_values.values[i]) def sort_fdb_request_ranges(self, current_start_idx, lat_length, fdb_node_ranges): (new_fdb_node_ranges, new_current_start_idx) = self.remove_duplicates_in_request_ranges( @@ -350,9 +360,9 @@ def sort_fdb_request_ranges(self, current_start_idx, lat_length, fdb_node_ranges sorted_list = sorted(enumerate(old_interm_start_idx[j]), key=lambda x: x[1]) original_indices_idx, interm_start_idx = zip(*sorted_list) # TODO: !!!!!!! should really sort the values here again - # for interm_fdb_nodes_obj in interm_fdb_nodes[j]: - # interm_fdb_nodes_obj.values = tuple([list(interm_fdb_nodes_obj.values)[k] - # for k in original_indices_idx]) + for interm_fdb_nodes_obj in interm_fdb_nodes[j]: + interm_fdb_nodes_obj.data.values = QEnum(tuple([list(interm_fdb_nodes_obj.values)[k] + for k in original_indices_idx])) if abs(interm_start_idx[-1] + 1 - interm_start_idx[0]) <= len(interm_start_idx): current_request_ranges = (interm_start_idx[0], interm_start_idx[-1] + 1) interm_request_ranges.append(current_request_ranges) diff --git a/polytope_feature/datacube/transformations/datacube_mappers/datacube_mappers.py b/polytope_feature/datacube/transformations/datacube_mappers/datacube_mappers.py index c5a2b551c..e06c8fd38 100644 --- a/polytope_feature/datacube/transformations/datacube_mappers/datacube_mappers.py +++ b/polytope_feature/datacube/transformations/datacube_mappers/datacube_mappers.py @@ -106,6 +106,8 @@ def unmap_path_key(self, key_value_path, leaf_path, unwanted_path, axis): if axis.name == self._mapped_axes()[1]: first_val = unwanted_path[self._mapped_axes()[0]] # unmapped_idx = [self.unmap(first_val, (val,)) for val in value] + # print("AND HERE??") + # print(values) unmapped_idx = self.unmap(first_val, values) leaf_path.pop(self._mapped_axes()[0], None) key_value_path.pop(axis.name) diff --git a/polytope_feature/datacube/transformations/datacube_mappers/mapper_types/healpix_nested.py b/polytope_feature/datacube/transformations/datacube_mappers/mapper_types/healpix_nested.py index 416976e59..fe695469a 100644 --- a/polytope_feature/datacube/transformations/datacube_mappers/mapper_types/healpix_nested.py +++ b/polytope_feature/datacube/transformations/datacube_mappers/mapper_types/healpix_nested.py @@ -137,16 +137,19 @@ def unmap_first_val_to_start_line_idx(self, first_val): else: return idx - def unmap(self, first_val, second_val, unmapped_idx=None): + def unmap(self, first_val, second_vals, unmapped_idx=None): tol = 1e-8 first_value = [i for i in self._first_axis_vals if first_val[0] - tol <= i <= first_val[0] + tol][0] first_idx = self._first_axis_vals.index(first_value) - second_val = [i for i in self.second_axis_vals(first_val) if second_val[0] - tol <= i <= second_val[0] + tol][0] - second_idx = self.second_axis_vals(first_val).index(second_val) - healpix_index = self.axes_idx_to_healpix_idx(first_idx, second_idx) - # TODO: here do conversion of ring to nested healpix representation before returning - healpix_index = self.ring_to_nested(healpix_index) - return [healpix_index] + healpix_idxs = [] + for second_val in second_vals: + second_val = [i for i in self.second_axis_vals(first_val) if second_val - tol <= i <= second_val + tol][0] + second_idx = self.second_axis_vals(first_val).index(second_val) + healpix_index = self.axes_idx_to_healpix_idx(first_idx, second_idx) + # TODO: here do conversion of ring to nested healpix representation before returning + healpix_index = self.ring_to_nested(healpix_index) + healpix_idxs.append(healpix_index) + return healpix_idxs def div_03(self, a, b): t = 1 if a >= (b << 1) else 0 @@ -222,4 +225,8 @@ def int_sqrt(self, i): # md5 grid hash in form {resolution : hash} -_md5_hash = {} +_md5_hash = { + 1024: "cbda19e48d4d7e5e22641154878b9b22", + 512: "47efaa0853e70948a41d5225e7653194", + 128: "f3dfeb7a5bbbdd13a20d10fdb3797c71", +} diff --git a/polytope_feature/engine/hullslicer.py b/polytope_feature/engine/hullslicer.py index efbdd21df..80feffd6d 100644 --- a/polytope_feature/engine/hullslicer.py +++ b/polytope_feature/engine/hullslicer.py @@ -139,6 +139,9 @@ def _build_branch(self, ax, node, datacube, next_nodes): self._build_unsliceable_child(polytope, ax, node, datacube, [lower], next_nodes, slice_axis_idx) else: values = self.find_values_between(polytope, ax, node, datacube, lower, upper) + # print(ax.name) + # print((lower, upper)) + # print(values) # NOTE: need to only remove the branches if the values are empty, # but only if there are no other possible children left in the tree that # we can append and if somehow this happens before and we need to remove, then what do we do?? diff --git a/tests/test_qubed_extraction_engine.py b/tests/test_qubed_extraction_engine.py index c99781544..2c720e50e 100644 --- a/tests/test_qubed_extraction_engine.py +++ b/tests/test_qubed_extraction_engine.py @@ -2,6 +2,7 @@ from polytope_feature.polytope import Polytope, Request from polytope_feature.engine.qubed_slicer import QubedSlicer from polytope_feature.datacube.backends.qubed import QubedDatacube +from polytope_feature.datacube.backends.fdb import FDBDatacube import pytest from qubed import Qube import requests @@ -12,6 +13,9 @@ from polytope_feature.datacube.transformations.datacube_mappers.mapper_types.healpix_nested import NestedHealpixGridMapper from polytope_feature.shapes import ConvexPolytope +import time +import pygribjump as gj +from polytope_feature.engine.hullslicer import HullSlicer fdb_tree = Qube.from_json(requests.get( @@ -131,7 +135,7 @@ ConvexPolytope(["dataset"], [["climate-dt"]]), ConvexPolytope(["class"], [["d1"]]), ConvexPolytope(["date"], [[pd.Timestamp("20220811")]]), - ConvexPolytope(["latitude", "longitude"], [[0, 0], [0.5, 0.5], [0, 0.5]])) + ConvexPolytope(["latitude", "longitude"], [[0, 0], [5, 5], [0, 5]])) qubeddatacube = QubedDatacube(fdb_tree, datacube_axes, datacube_transformations) slicer = QubedSlicer() @@ -140,8 +144,87 @@ engine=slicer, options=options, ) +time1 = time.time() result = self_API.retrieve(request) +time2 = time.time() -print(result) +print("TIME EXTRACTING USING QUBED") +print(time2 - time1) + +# USING NORMAL GJ + + +options = { + "axis_config": [ + {"axis_name": "step", "transformations": [{"name": "type_change", "type": "int"}]}, + {"axis_name": "number", "transformations": [{"name": "type_change", "type": "int"}]}, + # { + # "axis_name": "date", + # "transformations": [{"name": "merge", "other_axis": "time", "linkers": ["T", "00"]}], + # }, + {"axis_name": "date", "transformations": [{"name": "type_change", "type": "date"}]}, + {"axis_name": "time", "transformations": [{"name": "type_change", "type": "time"}]}, + { + "axis_name": "values", + "transformations": [ + {"name": "mapper", "type": "healpix_nested", "resolution": 1024, "axes": ["latitude", "longitude"]} + ], + }, + {"axis_name": "latitude", "transformations": [{"name": "reverse", "is_reverse": True}]}, + {"axis_name": "longitude", "transformations": [{"name": "cyclic", "range": [0, 360]}]}, + ], + "compressed_axes_config": [ + "longitude", + # "latitude", + # "levtype", + # "step", + # "date", + # "domain", + # "expver", + # "param", + # "class", + # "stream", + # "type", + ], + "pre_path": {"class": "d1", "model": "ifs-nemo", "resolution": "high"}, +} + +fdbdatacube = gj.GribJump() +slicer = HullSlicer() +self_API = Polytope( + datacube=fdbdatacube, + engine=slicer, + options=options, +) + + +request = Request(ConvexPolytope(["param"], [["164"]]), + ConvexPolytope(["time"], [[pd.Timedelta(hours=1, minutes=0)], [pd.Timedelta(hours=3, minutes=0)]]), + ConvexPolytope(["resolution"], [["high"]]), + ConvexPolytope(["type"], [["fc"]]), + ConvexPolytope(["model"], [['ifs-nemo']]), + ConvexPolytope(["stream"], [["clte"]]), + ConvexPolytope(["realization"], ["1"]), + ConvexPolytope(["expver"], [['0001']]), + ConvexPolytope(["experiment"], [['ssp3-7.0']]), + ConvexPolytope(["generation"], [["1"]]), + ConvexPolytope(["levtype"], [["sfc"]]), + ConvexPolytope(["activity"], [["scenariomip"]]), + ConvexPolytope(["dataset"], [["climate-dt"]]), + ConvexPolytope(["class"], [["d1"]]), + ConvexPolytope(["date"], [[pd.Timestamp("20220811")]]), + ConvexPolytope(["latitude", "longitude"], [[0, 0], [5, 5], [0, 5]])) + +time3 = time.time() +result = self_API.retrieve(request) +time4 = time.time() + +print("TIME EXTRACTING USING GJ NORMAL") +print(time4 - time3) + + +# print(result) + +# print(result.leaves) # sliced_tree = actual_slice(fdb_tree, combi_polytopes, datacube_axes, datacube_transformations) From 2e5c080da7e21678aa2d638dcd7a622b2fcb0290 Mon Sep 17 00:00:00 2001 From: mathleur Date: Thu, 24 Apr 2025 15:22:14 +0200 Subject: [PATCH 21/28] add the service part of the qubed like the pre-path --- tests/test_qubed_extraction_engine.py | 18 ++ tests/test_qubed_extraction_service.py | 269 +++++++++++++++++++++++++ 2 files changed, 287 insertions(+) create mode 100644 tests/test_qubed_extraction_service.py diff --git a/tests/test_qubed_extraction_engine.py b/tests/test_qubed_extraction_engine.py index 2c720e50e..991688e84 100644 --- a/tests/test_qubed_extraction_engine.py +++ b/tests/test_qubed_extraction_engine.py @@ -18,6 +18,24 @@ from polytope_feature.engine.hullslicer import HullSlicer +def find_relevant_subcube_from_request(request, qube_url): + + # NOTE: final url we want is like: + # "https://qubed.lumi.apps.dte.destination-earth.eu/api/v1/select/climate-dt/?class=d1&dataset=climate-dt" + + for shape in request.shapes: + if isinstance(shape, Select): + qube_url += shape.axis + "=" + for i, val in enumerate(shape.values): + qube_url += str(val) + if i < len(shape.values) - 1: + qube_url += "," + qube_url += "&" + # TODO: remove last unnecessary & + qube_url = qube_url[:-1] + return qube_url + + fdb_tree = Qube.from_json(requests.get( "https://github.com/ecmwf/qubed/raw/refs/heads/main/tests/example_qubes/climate_dt.json").json()) diff --git a/tests/test_qubed_extraction_service.py b/tests/test_qubed_extraction_service.py new file mode 100644 index 000000000..2781c3162 --- /dev/null +++ b/tests/test_qubed_extraction_service.py @@ -0,0 +1,269 @@ +from polytope_feature.shapes import Box, Select, Span +from polytope_feature.polytope import Polytope, Request +from polytope_feature.engine.qubed_slicer import QubedSlicer +from polytope_feature.datacube.backends.qubed import QubedDatacube +from polytope_feature.datacube.backends.fdb import FDBDatacube +import pytest +from qubed import Qube +import requests +from polytope_feature.datacube.datacube_axis import PandasTimedeltaDatacubeAxis, PandasTimestampDatacubeAxis, UnsliceableDatacubeAxis, FloatDatacubeAxis +from polytope_feature.datacube.backends.test_qubed_slicing import actual_slice +from polytope_feature.datacube.transformations.datacube_type_change.datacube_type_change import TypeChangeStrToTimestamp, TypeChangeStrToTimedelta +import pandas as pd +from polytope_feature.datacube.transformations.datacube_mappers.mapper_types.healpix_nested import NestedHealpixGridMapper + +from polytope_feature.shapes import ConvexPolytope +import time +import pygribjump as gj +from polytope_feature.engine.hullslicer import HullSlicer + + +def find_relevant_subcube_from_request(request, qube_url): + + # NOTE: final url we want is like: + # "https://qubed.lumi.apps.dte.destination-earth.eu/api/v1/select/climate-dt/?class=d1&dataset=climate-dt" + + for shape in request.shapes: + if isinstance(shape, Select): + qube_url += shape.axis + "=" + for i, val in enumerate(shape.values): + qube_url += str(val) + if i < len(shape.values) - 1: + qube_url += "," + qube_url += "&" + # TODO: remove last unnecessary & + qube_url = qube_url[:-1] + return qube_url + + +def get_fdb_tree(request): + qube_url_start = "https://qubed.lumi.apps.dte.destination-earth.eu/api/v1/select/climate-dt/?" + qube_url = find_relevant_subcube_from_request(request, qube_url_start) + fdb_tree = Qube.from_json(requests.get(qube_url).json()) + return fdb_tree + + +fdb_tree = Qube.from_json(requests.get( + "https://github.com/ecmwf/qubed/raw/refs/heads/main/tests/example_qubes/climate_dt.json").json()) + + +# print(fdb_tree) + +# combi_polytopes = [ +# ConvexPolytope(["param"], [["164"]]), +# ConvexPolytope(["time"], [[pd.Timedelta(hours=0, minutes=0)], [pd.Timedelta(hours=12, minutes=0)]]), +# ConvexPolytope(["resolution"], [["high"]]), +# ConvexPolytope(["type"], [["fc"]]), +# ConvexPolytope(["model"], [['ifs-nemo']]), +# ConvexPolytope(["stream"], [["clte"]]), +# ConvexPolytope(["realization"], ["1"]), +# ConvexPolytope(["expver"], [['0001']]), +# ConvexPolytope(["experiment"], [['ssp3-7.0']]), +# ConvexPolytope(["generation"], [["1"]]), +# ConvexPolytope(["levtype"], [["sfc"]]), +# ConvexPolytope(["activity"], [["scenariomip"]]), +# ConvexPolytope(["dataset"], [["climate-dt"]]), +# ConvexPolytope(["class"], [["d1"]]), +# ConvexPolytope(["date"], [[pd.Timestamp("20220811")], [pd.Timestamp("20220912")]]), +# ConvexPolytope(["latitude", "longitude"], [[0, 0], [0.5, 0.5], [0, 0.5]]) +# ] + +# TODO: add lat and lon axes +datacube_axes = {"param": UnsliceableDatacubeAxis(), + "time": PandasTimedeltaDatacubeAxis(), + "resolution": UnsliceableDatacubeAxis(), + "type": UnsliceableDatacubeAxis(), + "model": UnsliceableDatacubeAxis(), + "stream": UnsliceableDatacubeAxis(), + "realization": UnsliceableDatacubeAxis(), + "expver": UnsliceableDatacubeAxis(), + "experiment": UnsliceableDatacubeAxis(), + "generation": UnsliceableDatacubeAxis(), + "levtype": UnsliceableDatacubeAxis(), + "activity": UnsliceableDatacubeAxis(), + "dataset": UnsliceableDatacubeAxis(), + "class": UnsliceableDatacubeAxis(), + "date": PandasTimestampDatacubeAxis(), + "latitude": FloatDatacubeAxis(), + "longitude": FloatDatacubeAxis()} + +time_val = pd.Timedelta(hours=0, minutes=0) +date_val = pd.Timestamp("20300101T000000") + + +# TODO: add grid axis transformation +datacube_transformations = { + "time": TypeChangeStrToTimedelta("time", time_val), + "date": TypeChangeStrToTimestamp("date", date_val), + "values": NestedHealpixGridMapper("values", ["latitude", "longitude"], 1024) +} + + +options = { + "axis_config": [ + {"axis_name": "step", "transformations": [{"name": "type_change", "type": "int"}]}, + {"axis_name": "number", "transformations": [{"name": "type_change", "type": "int"}]}, + # { + # "axis_name": "date", + # "transformations": [{"name": "merge", "other_axis": "time", "linkers": ["T", "00"]}], + # }, + # {"axis_name": "date", "transformations": [{"name": "type_change", "type": "date"}]}, + # {"axis_name": "time", "transformations": [{"name": "type_change", "type": "time"}]}, + { + "axis_name": "values", + "transformations": [ + {"name": "mapper", "type": "healpix_nested", "resolution": 1024, "axes": ["latitude", "longitude"]} + ], + }, + {"axis_name": "latitude", "transformations": [{"name": "reverse", "is_reverse": True}]}, + {"axis_name": "longitude", "transformations": [{"name": "cyclic", "range": [0, 360]}]}, + ], + "compressed_axes_config": [ + "longitude", + "latitude", + "levtype", + "step", + "date", + "domain", + "expver", + "param", + "class", + "stream", + "type", + ], + "pre_path": {"class": "od", "expver": "0001", "levtype": "sfc", "stream": "oper"}, +} + +# request = Request( +# Select("step", [0]), +# Select("levtype", ["sfc"]), +# Select("date", [pd.Timestamp("20230625T120000")]), +# Select("domain", ["g"]), +# Select("expver", ["0001"]), +# Select("param", ["167"]), +# Select("class", ["od"]), +# Select("stream", ["oper"]), +# Select("type", ["an"]), +# Box(["latitude", "longitude"], [0, 0], [0.2, 0.2]), +# ) + +request = Request( + # ConvexPolytope(["param"], [["164"]]), + Select("param", ["164"]), + ConvexPolytope(["time"], [[pd.Timedelta(hours=0, minutes=0)], [pd.Timedelta(hours=3, minutes=0)]]), + ConvexPolytope(["resolution"], [["high"]]), + ConvexPolytope(["type"], [["fc"]]), + # ConvexPolytope(["model"], [['ifs-nemo']]), + Select("model", ["ifs-nemo"]), + Select("stream", ["clte"]), + # ConvexPolytope(["stream"], [["clte"]]), + ConvexPolytope(["realization"], ["1"]), + ConvexPolytope(["expver"], [['0001']]), + ConvexPolytope(["experiment"], [['ssp3-7.0']]), + ConvexPolytope(["generation"], [["1"]]), + ConvexPolytope(["levtype"], [["sfc"]]), + # ConvexPolytope(["activity"], [["scenariomip"]]), + Select("activity", ["scenariomip"]), + ConvexPolytope(["dataset"], [["climate-dt"]]), + ConvexPolytope(["class"], [["d1"]]), + ConvexPolytope(["date"], [[pd.Timestamp("20220811")]]), + ConvexPolytope(["latitude", "longitude"], [[0, 0], [5, 5], [0, 5]])) + +fdb_tree = get_fdb_tree(request) + +print("HERE WE HAVE THE FDB TREE") +print(fdb_tree) + +qubeddatacube = QubedDatacube(fdb_tree, datacube_axes, datacube_transformations) +slicer = QubedSlicer() +self_API = Polytope( + datacube=qubeddatacube, + engine=slicer, + options=options, +) +time1 = time.time() +result = self_API.retrieve(request) +time2 = time.time() + +# print(result) + +print("TIME EXTRACTING USING QUBED") +print(time2 - time1) + +# # USING NORMAL GJ + + +# options = { +# "axis_config": [ +# {"axis_name": "step", "transformations": [{"name": "type_change", "type": "int"}]}, +# {"axis_name": "number", "transformations": [{"name": "type_change", "type": "int"}]}, +# # { +# # "axis_name": "date", +# # "transformations": [{"name": "merge", "other_axis": "time", "linkers": ["T", "00"]}], +# # }, +# {"axis_name": "date", "transformations": [{"name": "type_change", "type": "date"}]}, +# {"axis_name": "time", "transformations": [{"name": "type_change", "type": "time"}]}, +# { +# "axis_name": "values", +# "transformations": [ +# {"name": "mapper", "type": "healpix_nested", "resolution": 1024, "axes": ["latitude", "longitude"]} +# ], +# }, +# {"axis_name": "latitude", "transformations": [{"name": "reverse", "is_reverse": True}]}, +# {"axis_name": "longitude", "transformations": [{"name": "cyclic", "range": [0, 360]}]}, +# ], +# "compressed_axes_config": [ +# "longitude", +# # "latitude", +# # "levtype", +# # "step", +# # "date", +# # "domain", +# # "expver", +# # "param", +# # "class", +# # "stream", +# # "type", +# ], +# "pre_path": {"class": "d1", "model": "ifs-nemo", "resolution": "high"}, +# } + +# fdbdatacube = gj.GribJump() +# slicer = HullSlicer() +# self_API = Polytope( +# datacube=fdbdatacube, +# engine=slicer, +# options=options, +# ) + + +# request = Request(ConvexPolytope(["param"], [["164"]]), +# ConvexPolytope(["time"], [[pd.Timedelta(hours=1, minutes=0)], [pd.Timedelta(hours=3, minutes=0)]]), +# ConvexPolytope(["resolution"], [["high"]]), +# ConvexPolytope(["type"], [["fc"]]), +# ConvexPolytope(["model"], [['ifs-nemo']]), +# ConvexPolytope(["stream"], [["clte"]]), +# ConvexPolytope(["realization"], ["1"]), +# ConvexPolytope(["expver"], [['0001']]), +# ConvexPolytope(["experiment"], [['ssp3-7.0']]), +# ConvexPolytope(["generation"], [["1"]]), +# ConvexPolytope(["levtype"], [["sfc"]]), +# ConvexPolytope(["activity"], [["scenariomip"]]), +# ConvexPolytope(["dataset"], [["climate-dt"]]), +# ConvexPolytope(["class"], [["d1"]]), +# ConvexPolytope(["date"], [[pd.Timestamp("20220811")]]), +# ConvexPolytope(["latitude", "longitude"], [[0, 0], [5, 5], [0, 5]])) + +# time3 = time.time() +# result = self_API.retrieve(request) +# time4 = time.time() + +# print("TIME EXTRACTING USING GJ NORMAL") +# print(time4 - time3) + + +# # print(result) + +# # print(result.leaves) + +# # sliced_tree = actual_slice(fdb_tree, combi_polytopes, datacube_axes, datacube_transformations) From a8a06c9e519043e97caeba779bed03f039f4b2e0 Mon Sep 17 00:00:00 2001 From: mathleur Date: Fri, 25 Apr 2025 10:16:48 +0200 Subject: [PATCH 22/28] clean up --- polytope_feature/datacube/backends/fdb.py | 411 ------------------ polytope_feature/datacube/backends/qubed.py | 24 - .../backends/test_qubed_extraction.py | 24 - .../datacube/backends/test_qubed_slicing.py | 305 ------------- .../engine/qubed_polytope_intersection.py | 87 ---- tests/test_qubed_extraction.py | 178 ++++---- tests/test_qubed_extraction_service.py | 2 +- 7 files changed, 90 insertions(+), 941 deletions(-) delete mode 100644 polytope_feature/datacube/backends/test_qubed_extraction.py delete mode 100644 polytope_feature/datacube/backends/test_qubed_slicing.py delete mode 100644 polytope_feature/engine/qubed_polytope_intersection.py diff --git a/polytope_feature/datacube/backends/fdb.py b/polytope_feature/datacube/backends/fdb.py index e2778d291..b69138c8d 100644 --- a/polytope_feature/datacube/backends/fdb.py +++ b/polytope_feature/datacube/backends/fdb.py @@ -1,414 +1,3 @@ -# import logging -# import operator -# from copy import deepcopy -# from itertools import product - -# from ...utility.exceptions import BadGridError, BadRequestError, GribJumpNoIndexError -# from ...utility.geometry import nearest_pt -# from .datacube import Datacube, TensorIndexTree - - -# class FDBDatacube(Datacube): -# def __init__( -# self, gj, config=None, axis_options=None, compressed_axes_options=[], alternative_axes=[], context=None -# ): -# if config is None: -# config = {} -# if context is None: -# context = {} - -# super().__init__(axis_options, compressed_axes_options) -# print(axis_options) - -# logging.info("Created an FDB datacube with options: " + str(axis_options)) - -# self.unwanted_path = {} -# self.axis_options = axis_options - -# partial_request = config -# # Find values in the level 3 FDB datacube - -# self.gj = gj -# if len(alternative_axes) == 0: -# logging.info("Find GribJump axes for %s", context) -# self.fdb_coordinates = self.gj.axes(partial_request, ctx=context) -# logging.info("Retrieved available GribJump axes for %s", context) -# if len(self.fdb_coordinates) == 0 or set(partial_request) > set(self.fdb_coordinates): -# raise BadRequestError(partial_request) -# else: -# self.fdb_coordinates = {} -# for axis_config in alternative_axes: -# self.fdb_coordinates[axis_config.axis_name] = axis_config.values - -# fdb_coordinates_copy = deepcopy(self.fdb_coordinates) -# for axis, vals in fdb_coordinates_copy.items(): -# if len(vals) == 1: -# if vals[0] == "": -# self.fdb_coordinates.pop(axis) - -# logging.info("Axes returned from GribJump are: " + str(self.fdb_coordinates)) - -# self.fdb_coordinates["values"] = [] -# for name, values in self.fdb_coordinates.items(): -# values.sort() -# options = None -# for opt in self.axis_options: -# if opt.axis_name == name: -# options = opt - -# self._check_and_add_axes(options, name, values) -# self.treated_axes.append(name) -# self.complete_axes.append(name) - -# # add other options to axis which were just created above like "lat" for the mapper transformations for eg -# for name in self._axes: -# if name not in self.treated_axes: -# options = None -# for opt in self.axis_options: -# if opt.axis_name == name: -# options = opt - -# val = self._axes[name].type -# self._check_and_add_axes(options, name, val) - -# logging.info("Polytope created axes for %s", self._axes.keys()) - -# def check_branching_axes(self, request): -# polytopes = request.polytopes() -# for polytope in polytopes: -# for ax in polytope._axes: -# if ax == "levtype": -# (upper, lower, idx) = polytope.extents(ax) -# if "sfc" in polytope.points[idx]: -# self.fdb_coordinates.pop("levelist", None) - -# if ax == "param": -# (upper, lower, idx) = polytope.extents(ax) -# if "140251" not in polytope.points[idx]: -# self.fdb_coordinates.pop("direction", None) -# self.fdb_coordinates.pop("frequency", None) -# else: -# # special param with direction and frequency -# if len(polytope.points[idx]) > 1: -# raise ValueError( -# "Param 251 is part of a special branching of the datacube. Please request it separately." # noqa: E501 -# ) -# self.fdb_coordinates.pop("quantile", None) -# self.fdb_coordinates.pop("year", None) -# self.fdb_coordinates.pop("month", None) - -# # NOTE: verify that we also remove the axis object for axes we've removed here -# axes_to_remove = set(self.complete_axes) - set(self.fdb_coordinates.keys()) - -# # Remove the keys from self._axes -# for axis_name in axes_to_remove: -# self._axes.pop(axis_name, None) - -# def get(self, requests: TensorIndexTree, context=None): -# # print("EVER GOT FDB DATA??") -# if context is None: -# context = {} -# if len(requests.children) == 0: -# return requests -# fdb_requests = [] -# fdb_requests_decoding_info = [] -# self.get_fdb_requests(requests, fdb_requests, fdb_requests_decoding_info) - -# # here, loop through the fdb requests and request from gj and directly add to the nodes -# complete_list_complete_uncompressed_requests = [] -# complete_fdb_decoding_info = [] -# for j, compressed_request in enumerate(fdb_requests): -# uncompressed_request = {} - -# # Need to determine the possible decompressed requests - -# # find the possible combinations of compressed indices -# interm_branch_tuple_values = [] -# for key in compressed_request[0].keys(): -# interm_branch_tuple_values.append(compressed_request[0][key]) -# request_combis = product(*interm_branch_tuple_values) - -# # Need to extract the possible requests and add them to the right nodes -# for combi in request_combis: -# uncompressed_request = {} -# for i, key in enumerate(compressed_request[0].keys()): -# uncompressed_request[key] = combi[i] -# complete_uncompressed_request = (uncompressed_request, compressed_request[1], self.grid_md5_hash) -# complete_list_complete_uncompressed_requests.append(complete_uncompressed_request) -# complete_fdb_decoding_info.append(fdb_requests_decoding_info[j]) - -# if logging.root.level <= logging.DEBUG: -# printed_list_to_gj = complete_list_complete_uncompressed_requests[::1000] -# logging.debug("The requests we give GribJump are: %s", printed_list_to_gj) -# logging.info("Requests given to GribJump extract for %s", context) -# try: -# output_values = self.gj.extract(complete_list_complete_uncompressed_requests, context) -# except Exception as e: -# if "BadValue: Grid hash mismatch" in str(e): -# logging.info("Error is: %s", e) -# raise BadGridError() -# if "Missing JumpInfo" in str(e): -# logging.info("Error is: %s", e) -# raise GribJumpNoIndexError() -# else: -# raise e - -# logging.info("Requests extracted from GribJump for %s", context) -# if logging.root.level <= logging.DEBUG: -# printed_output_values = output_values[::1000] -# logging.debug("GribJump outputs: %s", printed_output_values) -# self.assign_fdb_output_to_nodes(output_values, complete_fdb_decoding_info) - -# def get_fdb_requests( -# self, -# requests: TensorIndexTree, -# fdb_requests=[], -# fdb_requests_decoding_info=[], -# leaf_path=None, -# ): -# if leaf_path is None: -# leaf_path = {} - -# # First when request node is root, go to its children -# if requests.axis.name == "root": -# logging.debug("Looking for data for the tree") - -# for c in requests.children: -# self.get_fdb_requests(c, fdb_requests, fdb_requests_decoding_info) -# # If request node has no children, we have a leaf so need to assign fdb values to it -# else: -# key_value_path = {requests.axis.name: requests.values} -# ax = requests.axis -# (key_value_path, leaf_path, self.unwanted_path) = ax.unmap_path_key( -# key_value_path, leaf_path, self.unwanted_path -# ) -# leaf_path.update(key_value_path) -# if len(requests.children[0].children[0].children) == 0: -# # find the fdb_requests and associated nodes to which to add results -# (path, current_start_idxs, fdb_node_ranges, lat_length) = self.get_2nd_last_values(requests, leaf_path) -# ( -# original_indices, -# sorted_request_ranges, -# fdb_node_ranges, -# ) = self.sort_fdb_request_ranges(current_start_idxs, lat_length, fdb_node_ranges) -# fdb_requests.append((path, sorted_request_ranges)) -# fdb_requests_decoding_info.append((original_indices, fdb_node_ranges)) - -# # Otherwise remap the path for this key and iterate again over children -# else: -# for c in requests.children: -# self.get_fdb_requests(c, fdb_requests, fdb_requests_decoding_info, leaf_path) - -# def remove_duplicates_in_request_ranges(self, fdb_node_ranges, current_start_idxs): -# seen_indices = set() -# for i, idxs_list in enumerate(current_start_idxs): -# for k, sub_lat_idxs in enumerate(idxs_list): -# actual_fdb_node = fdb_node_ranges[i][k] -# original_fdb_node_range_vals = [] -# new_current_start_idx = [] -# for j, idx in enumerate(sub_lat_idxs): -# if idx not in seen_indices: -# # NOTE: need to remove it from the values in the corresponding tree node -# # NOTE: need to read just the range we give to gj -# original_fdb_node_range_vals.append(actual_fdb_node[0].values[j]) -# seen_indices.add(idx) -# new_current_start_idx.append(idx) -# if original_fdb_node_range_vals != []: -# actual_fdb_node[0].values = tuple(original_fdb_node_range_vals) -# else: -# # there are no values on this node anymore so can remove it -# actual_fdb_node[0].remove_branch() -# if len(new_current_start_idx) == 0: -# current_start_idxs[i].pop(k) -# else: -# current_start_idxs[i][k] = new_current_start_idx -# return (fdb_node_ranges, current_start_idxs) - -# def nearest_lat_lon_search(self, requests): -# if len(self.nearest_search) != 0: -# first_ax_name = requests.children[0].axis.name -# second_ax_name = requests.children[0].children[0].axis.name - -# axes_in_nearest_search = [ -# first_ax_name not in self.nearest_search.keys(), -# second_ax_name not in self.nearest_search.keys(), -# ] - -# if all(not item for item in axes_in_nearest_search): -# raise Exception("nearest point search axes are wrong") - -# second_ax = requests.children[0].children[0].axis - -# nearest_pts = self.nearest_search.get((first_ax_name, second_ax_name), None) -# if nearest_pts is None: -# nearest_pts = self.nearest_search.get((second_ax_name, first_ax_name), None) -# for i, pt in enumerate(nearest_pts): -# nearest_pts[i] = [pt[1], pt[0]] - -# transformed_nearest_pts = [] -# for point in nearest_pts: -# transformed_nearest_pts.append([point[0], second_ax._remap_val_to_axis_range(point[1])]) - -# found_latlon_pts = [] -# for lat_child in requests.children: -# for lon_child in lat_child.children: -# found_latlon_pts.append([lat_child.values, lon_child.values]) - -# # now find the nearest lat lon to the points requested -# nearest_latlons = [] -# for pt in transformed_nearest_pts: -# nearest_latlon = nearest_pt(found_latlon_pts, pt) -# nearest_latlons.append(nearest_latlon) - -# # need to remove the branches that do not fit -# lat_children_values = [child.values for child in requests.children] -# for i in range(len(lat_children_values)): -# lat_child_val = lat_children_values[i] -# lat_child = [child for child in requests.children if child.values == lat_child_val][0] -# if lat_child.values not in [(latlon[0],) for latlon in nearest_latlons]: -# lat_child.remove_branch() -# else: -# possible_lons = [latlon[1] for latlon in nearest_latlons if (latlon[0],) == lat_child.values] -# lon_children_values = [child.values for child in lat_child.children] -# for j in range(len(lon_children_values)): -# lon_child_val = lon_children_values[j] -# lon_child = [child for child in lat_child.children if child.values == lon_child_val][0] -# for value in lon_child.values: -# if value not in possible_lons: -# lon_child.remove_compressed_branch(value) - -# def get_2nd_last_values(self, requests, leaf_path=None): -# if leaf_path is None: -# leaf_path = {} -# # In this function, we recursively loop over the last two layers of the tree and store the indices of the -# # request ranges in those layers -# self.nearest_lat_lon_search(requests) - -# lat_length = len(requests.children) -# current_start_idxs = [False] * lat_length -# fdb_node_ranges = [False] * lat_length -# for i in range(len(requests.children)): -# lat_child = requests.children[i] -# lon_length = len(lat_child.children) -# current_start_idxs[i] = [None] * lon_length -# fdb_node_ranges[i] = [[TensorIndexTree.root for y in range(lon_length)] for x in range(lon_length)] -# current_start_idx = deepcopy(current_start_idxs[i]) -# fdb_range_nodes = deepcopy(fdb_node_ranges[i]) -# key_value_path = {lat_child.axis.name: lat_child.values} -# ax = lat_child.axis -# (key_value_path, leaf_path, self.unwanted_path) = ax.unmap_path_key( -# key_value_path, leaf_path, self.unwanted_path -# ) -# leaf_path.update(key_value_path) -# (current_start_idxs[i], fdb_node_ranges[i]) = self.get_last_layer_before_leaf( -# lat_child, leaf_path, current_start_idx, fdb_range_nodes -# ) - -# leaf_path_copy = deepcopy(leaf_path) -# leaf_path_copy.pop("values", None) -# return (leaf_path_copy, current_start_idxs, fdb_node_ranges, lat_length) - -# def get_last_layer_before_leaf(self, requests, leaf_path, current_idx, fdb_range_n): -# current_idx = [[] for i in range(len(requests.children))] -# fdb_range_n = [[] for i in range(len(requests.children))] -# for i, c in enumerate(requests.children): -# # now c are the leaves of the initial tree -# key_value_path = {c.axis.name: c.values} -# ax = c.axis -# (key_value_path, leaf_path, self.unwanted_path) = ax.unmap_path_key( -# key_value_path, leaf_path, self.unwanted_path -# ) -# # TODO: change this to accommodate non consecutive indexes being compressed too -# current_idx[i].extend(key_value_path["values"]) -# fdb_range_n[i].append(c) -# return (current_idx, fdb_range_n) - -# def assign_fdb_output_to_nodes(self, output_values, fdb_requests_decoding_info): -# for k in range(len(output_values)): -# request_output_values = output_values[k] -# ( -# original_indices, -# fdb_node_ranges, -# ) = fdb_requests_decoding_info[k] -# sorted_fdb_range_nodes = [fdb_node_ranges[i] for i in original_indices] -# for i in range(len(sorted_fdb_range_nodes)): -# n = sorted_fdb_range_nodes[i][0] -# if len(request_output_values[0]) == 0: -# # If we are here, no data was found for this path in the fdb -# none_array = [None] * len(n.values) -# n.result.extend(none_array) -# else: -# interm_request_output_values = request_output_values[0][i][0] -# n.result.extend(interm_request_output_values) - -# def sort_fdb_request_ranges(self, current_start_idx, lat_length, fdb_node_ranges): -# (new_fdb_node_ranges, new_current_start_idx) = self.remove_duplicates_in_request_ranges( -# fdb_node_ranges, current_start_idx -# ) -# interm_request_ranges = [] -# # TODO: modify the start indexes to have as many arrays as the request ranges -# new_fdb_node_ranges = [] -# for i in range(lat_length): -# interm_fdb_nodes = fdb_node_ranges[i] -# old_interm_start_idx = current_start_idx[i] -# for j in range(len(old_interm_start_idx)): -# # TODO: if we sorted the cyclic values in increasing order on the tree too, -# # then we wouldn't have to sort here? -# sorted_list = sorted(enumerate(old_interm_start_idx[j]), key=lambda x: x[1]) -# original_indices_idx, interm_start_idx = zip(*sorted_list) -# for interm_fdb_nodes_obj in interm_fdb_nodes[j]: -# interm_fdb_nodes_obj.values = tuple([interm_fdb_nodes_obj.values[k] for k in original_indices_idx]) -# if abs(interm_start_idx[-1] + 1 - interm_start_idx[0]) <= len(interm_start_idx): -# current_request_ranges = (interm_start_idx[0], interm_start_idx[-1] + 1) -# interm_request_ranges.append(current_request_ranges) -# new_fdb_node_ranges.append(interm_fdb_nodes[j]) -# else: -# jumps = list(map(operator.sub, interm_start_idx[1:], interm_start_idx[:-1])) -# last_idx = 0 -# for k, jump in enumerate(jumps): -# if jump > 1: -# current_request_ranges = (interm_start_idx[last_idx], interm_start_idx[k] + 1) -# new_fdb_node_ranges.append(interm_fdb_nodes[j]) -# last_idx = k + 1 -# interm_request_ranges.append(current_request_ranges) -# if k == len(interm_start_idx) - 2: -# current_request_ranges = (interm_start_idx[last_idx], interm_start_idx[-1] + 1) -# interm_request_ranges.append(current_request_ranges) -# new_fdb_node_ranges.append(interm_fdb_nodes[j]) -# request_ranges_with_idx = list(enumerate(interm_request_ranges)) -# sorted_list = sorted(request_ranges_with_idx, key=lambda x: x[1][0]) -# original_indices, sorted_request_ranges = zip(*sorted_list) -# return (original_indices, sorted_request_ranges, new_fdb_node_ranges) - -# def datacube_natural_indexes(self, axis, subarray): -# indexes = subarray.get(axis.name, None) -# return indexes - -# def select(self, path, unmapped_path): -# return self.fdb_coordinates - -# def ax_vals(self, name): -# return self.fdb_coordinates.get(name, None) - -# def prep_tree_encoding(self, node, unwanted_path=None): -# # TODO: prepare the tree for protobuf encoding -# # ie transform all axes for gribjump and adding the index property on the leaves -# if unwanted_path is None: -# unwanted_path = {} - -# ax = node.axis -# (new_node, unwanted_path) = ax.unmap_tree_node(node, unwanted_path) - -# if len(node.children) != 0: -# for c in new_node.children: -# self.prep_tree_encoding(c, unwanted_path) - -# def prep_tree_decoding(self, tree): -# # TODO: transform the tree after decoding from protobuf -# # ie unstransform all axes from gribjump and put the indexes back as a leaf/extra node -# pass - - import logging import operator from copy import deepcopy diff --git a/polytope_feature/datacube/backends/qubed.py b/polytope_feature/datacube/backends/qubed.py index dde9a9897..cfe7b31fc 100644 --- a/polytope_feature/datacube/backends/qubed.py +++ b/polytope_feature/datacube/backends/qubed.py @@ -26,14 +26,12 @@ def __init__( self.datacube_transformations = datacube_transformations # TODO: find compressed_axes list self.compressed_axes = [] - # self._axes = datacube_axes # TODO: should the gj object be passed in instead? self.gj = pygj.GribJump() # TODO: this doesn't fill the axes as wanted super().__init__(axis_options, compressed_axes_options) - # self._axes = datacube_axes # TODO: where do these come from and are they right? self.unwanted_path = {} @@ -43,8 +41,6 @@ def __init__( # Find values in the level 3 FDB datacube self.fdb_coordinates = {} - print("WHAT ARE THE AXIS OPTIONS") - print(axis_options) # TODO: we instead now have a list of axes with the actual axes types... @@ -74,12 +70,6 @@ def __init__( val = self._axes[name].type self._check_and_add_axes(options, name, val) - # def get(self, requests: TensorIndexTree, context): - # # TODO: use GJ to extract data from an fdb - # return requests - # print("WHAT's INSIDE OF FDB?") - # print(self.gj.axes({"class": "d1", "model": "ifs-nemo", "resolution": "high"})) - def get(self, requests, context=None): if context is None: context = {} @@ -119,8 +109,6 @@ def get(self, requests, context=None): logging.debug("The requests we give GribJump are: %s", printed_list_to_gj) logging.info("Requests given to GribJump extract for %s", context) try: - # print("HER ELOOOK NOW WHAT WE GIVE TO GJ") - # print(complete_list_complete_uncompressed_requests) output_values = self.gj.extract(complete_list_complete_uncompressed_requests, context) except Exception as e: if "BadValue: Grid hash mismatch" in str(e): @@ -157,14 +145,10 @@ def get_fdb_requests( # If request node has no children, we have a leaf so need to assign fdb values to it else: key_value_path = {requests.key: requests.values} - # ax = requests.axis ax = self._axes[requests.key] (key_value_path, leaf_path, self.unwanted_path) = ax.unmap_path_key( key_value_path, leaf_path, self.unwanted_path ) - # print("HERE NOW LOOK") - # print(requests.key) - # print(key_value_path) # TODO: change to use the datacube trasnformations instead... if requests.key == "time": new_vals = [] @@ -309,12 +293,9 @@ def get_last_layer_before_leaf(self, requests, leaf_path, current_idx, fdb_range # now c are the leaves of the initial tree key_value_path = {c.key: list(c.values)} ax = self._axes[c.key] - # print("LOOK HERE IF WE HAVE SAME NUM VALS") - # print(list(c.values)) (key_value_path, leaf_path, self.unwanted_path) = ax.unmap_path_key( key_value_path, leaf_path, self.unwanted_path ) - # print(key_value_path["values"]) # TODO: change this to accommodate non consecutive indexes being compressed too current_idx[i].extend(key_value_path["values"]) fdb_range_n[i].append(c) @@ -322,7 +303,6 @@ def get_last_layer_before_leaf(self, requests, leaf_path, current_idx, fdb_range def assign_fdb_output_to_nodes(self, output_values, fdb_requests_decoding_info): for k, request_output_values in enumerate(output_values): - # request_output_values = output_values[k] ( original_indices, fdb_node_ranges, @@ -333,13 +313,10 @@ def assign_fdb_output_to_nodes(self, output_values, fdb_requests_decoding_info): if len(request_output_values.values) == 0: # If we are here, no data was found for this path in the fdb none_array = [None] * len(n.values) - # n.result.extend(none_array) if n.data.metadata.get("result", None) is None: n.data.metadata["result"] = [] n.data.metadata["result"].extend(none_array) else: - # interm_request_output_values = request_output_values[0][i][0] - # n.result.extend(request_output_values.values[i]) if n.data.metadata.get("result", None) is None: n.data.metadata["result"] = [] n.data.metadata["result"].extend(request_output_values.values[i]) @@ -359,7 +336,6 @@ def sort_fdb_request_ranges(self, current_start_idx, lat_length, fdb_node_ranges # then we wouldn't have to sort here? sorted_list = sorted(enumerate(old_interm_start_idx[j]), key=lambda x: x[1]) original_indices_idx, interm_start_idx = zip(*sorted_list) - # TODO: !!!!!!! should really sort the values here again for interm_fdb_nodes_obj in interm_fdb_nodes[j]: interm_fdb_nodes_obj.data.values = QEnum(tuple([list(interm_fdb_nodes_obj.values)[k] for k in original_indices_idx])) diff --git a/polytope_feature/datacube/backends/test_qubed_extraction.py b/polytope_feature/datacube/backends/test_qubed_extraction.py deleted file mode 100644 index 5d3204db5..000000000 --- a/polytope_feature/datacube/backends/test_qubed_extraction.py +++ /dev/null @@ -1,24 +0,0 @@ -from qubed import Qube -import requests - -from ...shapes import ConvexPolytope - - -fdb_tree = Qube.from_json(requests.get( - "https://github.com/ecmwf/qubed/raw/refs/heads/main/tests/example_qubes/climate_dt.json").json()) - -fdb_tree.print() -combi_polytopes = [ - # ConvexPolytope() -] - -# Select("step", [0]), -# Select("levtype", ["sfc"]), -# Select("date", [pd.Timestamp("20231102T000000")]), -# Select("domain", ["g"]), -# Select("expver", ["0001"]), -# Select("param", ["167"]), -# Select("class", ["od"]), -# Select("stream", ["oper"]), -# Select("type", ["fc"]), -# Box(["latitude", "longitude"], [0, 0], [80, 80]), diff --git a/polytope_feature/datacube/backends/test_qubed_slicing.py b/polytope_feature/datacube/backends/test_qubed_slicing.py deleted file mode 100644 index 00919d324..000000000 --- a/polytope_feature/datacube/backends/test_qubed_slicing.py +++ /dev/null @@ -1,305 +0,0 @@ -from qubed import Qube -from qubed.value_types import QEnum -from qubed.set_operations import union -from ...engine.hullslicer import slice -import pandas as pd -from ..datacube_axis import UnsliceableDatacubeAxis -from ..transformations.datacube_mappers.datacube_mappers import DatacubeMapper -from ...shapes import ConvexPolytope, Product -from ...utility.combinatorics import group, tensor_product - - -def _actual_slice(q: Qube, polytopes_to_slice, datacube_axes, datacube_transformations) -> 'Qube': - - def find_polytopes_on_axis(axis_name, polytopes): - polytopes_on_axis = [] - for poly in polytopes: - if axis_name in poly._axes: - polytopes_on_axis.append(poly) - return polytopes_on_axis - - def change_poly_axis_type(axis_name, polytopes, datacube_axes): - axis = datacube_axes[axis_name] - # loop through the polytopes and change each polytopes's values according to axis - if isinstance(axis, UnsliceableDatacubeAxis): - return - - for poly in polytopes: - i = 0 - for k, ax_name in enumerate(poly._axes): - if ax_name == axis_name: - i = k - for j, val in enumerate(poly.points): - poly.points[j][i] = axis.to_float(axis.parse(poly.points[j][i])) - - def _axes_compressed(): - return {} - - def change_datacube_val_types(child: Qube, datacube_transformations): - axis_name = child.key - transformation = datacube_transformations.get(axis_name, None) - child_vals = child.values - new_vals = [] - for val in child_vals: - if transformation: - new_vals.append(transformation.transform_type(val)) - else: - new_vals.append(val) - - return new_vals - - def transform_upper_lower(axis_name, lower, upper, datacube_axes): - ax = datacube_axes[axis_name] - if isinstance(ax, UnsliceableDatacubeAxis): - return (lower, upper) - tol = ax.tol - lower = ax.from_float(lower - tol) - upper = ax.from_float(upper + tol) - - return (lower, upper) - - def _slice_second_grid_axis(axis_name, polytopes, datacube_axes, datacube_transformations, second_axis_vals) -> list[Qube]: - result = [] - polytopes_on_axis = find_polytopes_on_axis(axis_name, polytopes) - - for poly in polytopes_on_axis: - lower, upper, slice_axis_idx = poly.extents(axis_name) - - new_lower, new_upper = transform_upper_lower(axis_name, lower, upper, datacube_axes) - found_vals = [v for v in second_axis_vals if new_lower <= v <= new_upper] - - if len(found_vals) == 0: - continue - - # slice polytope along each value on child and keep resulting polytopes in memory - sliced_polys = [] - for val in found_vals: - ax = datacube_axes[axis_name] - if not isinstance(ax, UnsliceableDatacubeAxis): - fval = ax.to_float(val) - # slice polytope along the value and add sliced polytope to list of polytopes in memory - sliced_poly = slice(poly, axis_name, fval, slice_axis_idx) - sliced_polys.append(sliced_poly) - # decide if axis should be compressed or not according to polytope - # NOTE: actually the second grid axis will always be compressed - - # if it's not compressed, need to separate into different nodes to append to the tree - - new_found_vals = [] - for found_val in found_vals: - if isinstance(found_val, pd.Timedelta) or isinstance(found_val, pd.Timestamp): - new_found_vals.append(str(found_val)) - else: - new_found_vals.append(found_val) - - # NOTE this was the last axis so we do not have children... - - result.extend([Qube.make( - key=axis_name, - values=QEnum(new_found_vals), - metadata={}, - children={} - )]) - return result - - def _slice(q: Qube, polytopes, datacube_axes, datacube_transformations) -> list[Qube]: - result = [] - - if len(q.children) == 0: - # add "fake" axes and their nodes in order -> what about merged axes?? - mapper_transformation = None - for transformation in list(datacube_transformations.values()): - if isinstance(transformation, DatacubeMapper): - mapper_transformation = transformation - if not mapper_transformation: - # There is no grid mapping - pass - else: - # Slice on the two grid axes - grid_axes = mapper_transformation._mapped_axes - - # Handle first grid axis - polytopes_on_axis = find_polytopes_on_axis(grid_axes[0], polytopes) - - for poly in polytopes_on_axis: - lower, upper, slice_axis_idx = poly.extents(grid_axes[0]) - - first_ax_vals = mapper_transformation.first_axis_vals() - - new_lower, new_upper = transform_upper_lower(grid_axes[0], lower, upper, datacube_axes) - found_vals = [v for v in first_ax_vals if new_lower <= v <= new_upper] - - if len(found_vals) == 0: - continue - - # slice polytope along each value on child and keep resulting polytopes in memory - sliced_polys = [] - for val in found_vals: - ax = datacube_axes[grid_axes[0]] - if not isinstance(ax, UnsliceableDatacubeAxis): - fval = ax.to_float(val) - # slice polytope along the value and add sliced polytope to list of polytopes in memory - sliced_poly = slice(poly, grid_axes[0], fval, slice_axis_idx) - sliced_polys.append(sliced_poly) - # decide if axis should be compressed or not according to polytope - # NOTE: actually the first grid axis will never be compressed - axis_compressed = _axes_compressed().get(grid_axes[0], False) - - # if it's not compressed, need to separate into different nodes to append to the tree - for i, found_val in enumerate(found_vals): - child_polytopes = [p for p in polytopes if p != poly] - if sliced_polys[i]: - child_polytopes.append(sliced_polys[i]) - - second_axis_vals = mapper_transformation.second_axis_vals([found_val]) - - # get second axis children through slicing - children = _slice_second_grid_axis( - grid_axes[1], child_polytopes, datacube_axes, datacube_transformations, second_axis_vals) - # If this node used to have children but now has none due to filtering, skip it. - if not children: - continue - if isinstance(found_val, pd.Timedelta) or isinstance(found_val, pd.Timestamp): - found_val = [str(found_val)] - - # TODO: when we have an axis that we would like to merge with another, we should skip the node creation here - # and instead keep/cache the value to merge with the node from before?? - - qube_node = Qube.make(key=grid_axes[0], - values=QEnum([found_val]), - metadata={}, - children=children) - result.append(qube_node) - - for i, child in enumerate(q.children): - # find polytopes which are defined on axis child.key - polytopes_on_axis = find_polytopes_on_axis(child.key, polytopes) - - # here now first change the values in the polytopes on the axis to reflect the axis type - - for poly in polytopes_on_axis: - # find extents of polytope on child.key - lower, upper, slice_axis_idx = poly.extents(child.key) - - # find values on child that are within extents - # here first change the child values of the datacube ie the Qubed tree to their right type with the transformation - modified_vals = change_datacube_val_types(child, datacube_transformations) - - # here use the axis to transform lower and upper to right type too - new_lower, new_upper = transform_upper_lower(child.key, lower, upper, datacube_axes) - found_vals = [v for v in modified_vals if new_lower <= v <= new_upper] - - if len(found_vals) == 0: - continue - - # slice polytope along each value on child and keep resulting polytopes in memory - sliced_polys = [] - for val in found_vals: - ax = datacube_axes[child.key] - if not isinstance(ax, UnsliceableDatacubeAxis): - fval = ax.to_float(val) - # slice polytope along the value and add sliced polytope to list of polytopes in memory - sliced_poly = slice(poly, child.key, fval, slice_axis_idx) - sliced_polys.append(sliced_poly) - # decide if axis should be compressed or not according to polytope - axis_compressed = _axes_compressed().get(child.key, False) - # if it's not compressed, need to separate into different nodes to append to the tree - if not axis_compressed and len(found_vals) > 1: - for i, found_val in enumerate(found_vals): - child_polytopes = [p for p in polytopes if p != poly] - if sliced_polys[i]: - child_polytopes.append(sliced_polys[i]) - children = _slice(child, child_polytopes, datacube_axes, datacube_transformations) - # If this node used to have children but now has none due to filtering, skip it. - if child.children and not children: - continue - if isinstance(found_val, pd.Timedelta) or isinstance(found_val, pd.Timestamp): - found_val = [str(found_val)] - - # TODO: when we have an axis that we would like to merge with another, we should skip the node creation here - # and instead keep/cache the value to merge with the node from before?? - - qube_node = Qube.make(key=child.key, - values=QEnum(found_val), - metadata=child.metadata, - children=children) - result.append(qube_node) - else: - # if it's compressed, then can add all found values in a single node - child_polytopes = [p for p in polytopes if p != poly] - child_polytopes.extend([sliced_poly_ for sliced_poly_ in sliced_polys if sliced_poly_ is not None]) - # create children - children = _slice(child, child_polytopes, datacube_axes, datacube_transformations) - # If this node used to have children but now has none due to filtering, skip it. - if child.children and not children: - continue - - new_found_vals = [] - for found_val in found_vals: - if isinstance(found_val, pd.Timedelta) or isinstance(found_val, pd.Timestamp): - new_found_vals.append(str(found_val)) - else: - new_found_vals.append(found_val) - - result.extend([Qube.make( - key=child.key, - values=QEnum(new_found_vals), - metadata=child.metadata, - children=children - )]) - - return result - - # change the polytope point types here - for polytope in polytopes_to_slice: - for axis in polytope._axes: - change_poly_axis_type(axis, [polytope], datacube_axes) - - return Qube.root_node(_slice(q, polytopes_to_slice, datacube_axes, datacube_transformations)) - - -def actual_slice(q: Qube, polytopes_to_slice, datacube_axes, datacube_transformations): - # for p in polytopes_to_slice: - # if isinstance(p, Product): - # for poly in p.polytope(): - # self._unique_continuous_points(poly, datacube) - # else: - # self._unique_continuous_points(p, datacube) - - groups, input_axes = group(polytopes_to_slice) - # datacube.validate(input_axes) - # request = TensorIndexTree() - combinations = tensor_product(groups) - - sub_trees = [] - - # NOTE: could optimise here if we know combinations will always be for one request. - # Then we do not need to create a new index tree and merge it to request, but can just - # directly work on request and return it... - - for c in combinations: - # r = TensorIndexTree() - new_c = [] - for combi in c: - if isinstance(combi, list): - new_c.extend(combi) - else: - new_c.append(combi) - # NOTE TODO: here some of the polys in new_c can be a Product shape instead of a ConvexPolytope - # -> need to go through the polytopes in new_c and replace the Products with their sub-ConvexPolytopes - final_polys = [] - for poly in new_c: - if isinstance(poly, Product): - final_polys.extend(poly.polytope()) - else: - final_polys.append(poly) - - # Get the sliced Qube for each combi - r = _actual_slice(q, final_polys, datacube_axes, datacube_transformations) - sub_trees.append(r) - - final_tree = sub_trees[0] - - for sub_tree in sub_trees[1:]: - union(final_tree, sub_tree) - return final_tree diff --git a/polytope_feature/engine/qubed_polytope_intersection.py b/polytope_feature/engine/qubed_polytope_intersection.py deleted file mode 100644 index 4f8b1a7a4..000000000 --- a/polytope_feature/engine/qubed_polytope_intersection.py +++ /dev/null @@ -1,87 +0,0 @@ -from typing import List - -from ..datacube.tensor_index_tree import TensorIndexTree -from ..shapes import ConvexPolytope -from ..utility.combinatorics import group, tensor_product -from .engine import Engine -from ..utility.list_tools import unique -from qubed import Qube - - -class QubedSlicing(Engine): - def __init__(self): - self.datacube = ?? - - def create_fake_datacube_mappers(self): - # TODO - self.datacube_mappers = ?? - pass - - def create_request_polys(self, polytopes): - for p in polytopes: - self._unique_continuous_points(p) - - groups, input_axes = group(polytopes) - combinations = tensor_product(groups) - return combinations - - def _unique_continuous_points(self, p: ConvexPolytope): - for i, ax in enumerate(p._axes): - mapper = self.datacube_mappers.get(ax, None) - for j, val in enumerate(p.points): - p.points[j][i] = mapper.to_float(mapper.parse(p.points[j][i])) - # Remove duplicate points - unique(p.points) - - # def build_tree(self, combination): - - # unsliced_polytopes = set(combination) - - # def _build_tree(self, q: Qube): - # for child in q.children: - # # Find the axis object - # ax = self.datacube_mappers[child.key] - # self.build_branch() - - def extract(self, datacube, polytopes: List[ConvexPolytope]): - combinations = self.create_request_polys(polytopes) - - request = Qube.empty() - - for c in combinations: - new_c = [] - for combi in c: - if isinstance(combi, list): - new_c.extend(combi) - else: - new_c.append(combi) - - # r = build_tree # TODO - # pass - # r.set - - # TODO: replace all the TensorIndexTrees with Qube trees - - # request = TensorIndexTree() - - # for c in combinations: - # r = TensorIndexTree() - # new_c = [] - # for combi in c: - # if isinstance(combi, list): - # new_c.extend(combi) - # else: - # new_c.append(combi) - # r["unsliced_polytopes"] = set(new_c) - # current_nodes = [r] - # for ax in datacube.axes.values(): - # next_nodes = [] - # interm_next_nodes = [] - # for node in current_nodes: - # self._build_branch(ax, node, datacube, interm_next_nodes) - # next_nodes.extend(interm_next_nodes) - # interm_next_nodes = [] - # current_nodes = next_nodes - - # request.merge(r) - # return request diff --git a/tests/test_qubed_extraction.py b/tests/test_qubed_extraction.py index bca3b50ad..ef8e19d17 100644 --- a/tests/test_qubed_extraction.py +++ b/tests/test_qubed_extraction.py @@ -1,27 +1,46 @@ -from qubed import Qube -import requests -from polytope_feature.datacube.datacube_axis import PandasTimedeltaDatacubeAxis, PandasTimestampDatacubeAxis, UnsliceableDatacubeAxis, FloatDatacubeAxis -from polytope_feature.datacube.backends.test_qubed_slicing import actual_slice -from polytope_feature.datacube.transformations.datacube_type_change.datacube_type_change import TypeChangeStrToTimestamp, TypeChangeStrToTimedelta -import pandas as pd -from polytope_feature.datacube.transformations.datacube_mappers.mapper_types.healpix_nested import NestedHealpixGridMapper +# from qubed import Qube +# import requests +# from polytope_feature.datacube.datacube_axis import PandasTimedeltaDatacubeAxis, PandasTimestampDatacubeAxis, UnsliceableDatacubeAxis, FloatDatacubeAxis +# from polytope_feature.datacube.backends.test_qubed_slicing import actual_slice +# from polytope_feature.datacube.transformations.datacube_type_change.datacube_type_change import TypeChangeStrToTimestamp, TypeChangeStrToTimedelta +# import pandas as pd +# from polytope_feature.datacube.transformations.datacube_mappers.mapper_types.healpix_nested import NestedHealpixGridMapper -from polytope_feature.shapes import ConvexPolytope +# from polytope_feature.shapes import ConvexPolytope -fdb_tree = Qube.from_json(requests.get( - "https://github.com/ecmwf/qubed/raw/refs/heads/main/tests/example_qubes/climate_dt.json").json()) +# fdb_tree = Qube.from_json(requests.get( +# "https://github.com/ecmwf/qubed/raw/refs/heads/main/tests/example_qubes/climate_dt.json").json()) -# fdb_tree = fdb_tree.remove_by_key(["year"]).remove_by_key(["month"]) +# # fdb_tree = fdb_tree.remove_by_key(["year"]).remove_by_key(["month"]) -fdb_tree.print() +# fdb_tree.print() -print(fdb_tree.axes().keys()) +# print(fdb_tree.axes().keys()) +# # combi_polytopes = [ +# # ConvexPolytope(["param"], [["168"]]), +# # ConvexPolytope(["time"], [[pd.Timedelta(hours=0, minutes=0)], [pd.Timedelta(hours=12, minutes=0)]]), +# # ConvexPolytope(["resolution"], [["high"]]), +# # ConvexPolytope(["type"], [["fc"]]), +# # ConvexPolytope(["model"], [['ifs-nemo']]), +# # ConvexPolytope(["stream"], [["clte"]]), +# # ConvexPolytope(["realization"], ["1"]), +# # ConvexPolytope(["expver"], [['0001']]), +# # ConvexPolytope(["experiment"], [['ssp3-7.0']]), +# # ConvexPolytope(["generation"], [["1"]]), +# # ConvexPolytope(["levtype"], [["sfc"]]), +# # ConvexPolytope(["activity"], [["scenariomip"]]), +# # ConvexPolytope(["dataset"], [["climate-dt"]]), +# # ConvexPolytope(["class"], [["d1"]]), +# # ConvexPolytope(["date"], [[pd.Timestamp("20210728")], [pd.Timestamp("20210729")]]) +# # ] + +# # TODO: add lat/lon polygon # combi_polytopes = [ -# ConvexPolytope(["param"], [["168"]]), +# ConvexPolytope(["param"], [["164"]]), # ConvexPolytope(["time"], [[pd.Timedelta(hours=0, minutes=0)], [pd.Timedelta(hours=12, minutes=0)]]), # ConvexPolytope(["resolution"], [["high"]]), # ConvexPolytope(["type"], [["fc"]]), @@ -35,80 +54,61 @@ # ConvexPolytope(["activity"], [["scenariomip"]]), # ConvexPolytope(["dataset"], [["climate-dt"]]), # ConvexPolytope(["class"], [["d1"]]), -# ConvexPolytope(["date"], [[pd.Timestamp("20210728")], [pd.Timestamp("20210729")]]) +# ConvexPolytope(["date"], [[pd.Timestamp("20220811")], [pd.Timestamp("20220912")]]), +# ConvexPolytope(["latitude", "longitude"], [[0, 0], [0.5, 0.5], [0, 0.5]]) # ] -# TODO: add lat/lon polygon -combi_polytopes = [ - ConvexPolytope(["param"], [["164"]]), - ConvexPolytope(["time"], [[pd.Timedelta(hours=0, minutes=0)], [pd.Timedelta(hours=12, minutes=0)]]), - ConvexPolytope(["resolution"], [["high"]]), - ConvexPolytope(["type"], [["fc"]]), - ConvexPolytope(["model"], [['ifs-nemo']]), - ConvexPolytope(["stream"], [["clte"]]), - ConvexPolytope(["realization"], ["1"]), - ConvexPolytope(["expver"], [['0001']]), - ConvexPolytope(["experiment"], [['ssp3-7.0']]), - ConvexPolytope(["generation"], [["1"]]), - ConvexPolytope(["levtype"], [["sfc"]]), - ConvexPolytope(["activity"], [["scenariomip"]]), - ConvexPolytope(["dataset"], [["climate-dt"]]), - ConvexPolytope(["class"], [["d1"]]), - ConvexPolytope(["date"], [[pd.Timestamp("20220811")], [pd.Timestamp("20220912")]]), - ConvexPolytope(["latitude", "longitude"], [[0, 0], [0.5, 0.5], [0, 0.5]]) -] - -# TODO: add lat and lon axes -datacube_axes = {"param": UnsliceableDatacubeAxis(), - "time": PandasTimedeltaDatacubeAxis(), - "resolution": UnsliceableDatacubeAxis(), - "type": UnsliceableDatacubeAxis(), - "model": UnsliceableDatacubeAxis(), - "stream": UnsliceableDatacubeAxis(), - "realization": UnsliceableDatacubeAxis(), - "expver": UnsliceableDatacubeAxis(), - "experiment": UnsliceableDatacubeAxis(), - "generation": UnsliceableDatacubeAxis(), - "levtype": UnsliceableDatacubeAxis(), - "activity": UnsliceableDatacubeAxis(), - "dataset": UnsliceableDatacubeAxis(), - "class": UnsliceableDatacubeAxis(), - "date": PandasTimestampDatacubeAxis(), - "latitude": FloatDatacubeAxis(), - "longitude": FloatDatacubeAxis()} - -time_val = pd.Timedelta(hours=0, minutes=0) -date_val = pd.Timestamp("20300101T000000") - - -# TODO: add grid axis transformation -datacube_transformations = { - "time": TypeChangeStrToTimedelta("time", time_val), - "date": TypeChangeStrToTimestamp("date", date_val), - "values": NestedHealpixGridMapper("values", ["latitude", "longitude"], 1024) -} - - -sliced_tree = actual_slice(fdb_tree, combi_polytopes, datacube_axes, datacube_transformations) - - -print("THE FINAL RESULT IS") -print(sliced_tree) - -# TODO: treat the transformations to talk to the qubed tree, maybe do it - -# TODO: start iterating fdb_tree and creating a new request tree - -# print(fdb_tree.) - - -# Select("step", [0]), -# Select("levtype", ["sfc"]), -# Select("date", [pd.Timestamp("20231102T000000")]), -# Select("domain", ["g"]), -# Select("expver", ["0001"]), -# Select("param", ["167"]), -# Select("class", ["od"]), -# Select("stream", ["oper"]), -# Select("type", ["fc"]), -# Box(["latitude", "longitude"], [0, 0], [80, 80]), +# # TODO: add lat and lon axes +# datacube_axes = {"param": UnsliceableDatacubeAxis(), +# "time": PandasTimedeltaDatacubeAxis(), +# "resolution": UnsliceableDatacubeAxis(), +# "type": UnsliceableDatacubeAxis(), +# "model": UnsliceableDatacubeAxis(), +# "stream": UnsliceableDatacubeAxis(), +# "realization": UnsliceableDatacubeAxis(), +# "expver": UnsliceableDatacubeAxis(), +# "experiment": UnsliceableDatacubeAxis(), +# "generation": UnsliceableDatacubeAxis(), +# "levtype": UnsliceableDatacubeAxis(), +# "activity": UnsliceableDatacubeAxis(), +# "dataset": UnsliceableDatacubeAxis(), +# "class": UnsliceableDatacubeAxis(), +# "date": PandasTimestampDatacubeAxis(), +# "latitude": FloatDatacubeAxis(), +# "longitude": FloatDatacubeAxis()} + +# time_val = pd.Timedelta(hours=0, minutes=0) +# date_val = pd.Timestamp("20300101T000000") + + +# # TODO: add grid axis transformation +# datacube_transformations = { +# "time": TypeChangeStrToTimedelta("time", time_val), +# "date": TypeChangeStrToTimestamp("date", date_val), +# "values": NestedHealpixGridMapper("values", ["latitude", "longitude"], 1024) +# } + + +# sliced_tree = actual_slice(fdb_tree, combi_polytopes, datacube_axes, datacube_transformations) + + +# print("THE FINAL RESULT IS") +# print(sliced_tree) + +# # TODO: treat the transformations to talk to the qubed tree, maybe do it + +# # TODO: start iterating fdb_tree and creating a new request tree + +# # print(fdb_tree.) + + +# # Select("step", [0]), +# # Select("levtype", ["sfc"]), +# # Select("date", [pd.Timestamp("20231102T000000")]), +# # Select("domain", ["g"]), +# # Select("expver", ["0001"]), +# # Select("param", ["167"]), +# # Select("class", ["od"]), +# # Select("stream", ["oper"]), +# # Select("type", ["fc"]), +# # Box(["latitude", "longitude"], [0, 0], [80, 80]), diff --git a/tests/test_qubed_extraction_service.py b/tests/test_qubed_extraction_service.py index 2781c3162..ddf235a96 100644 --- a/tests/test_qubed_extraction_service.py +++ b/tests/test_qubed_extraction_service.py @@ -7,7 +7,7 @@ from qubed import Qube import requests from polytope_feature.datacube.datacube_axis import PandasTimedeltaDatacubeAxis, PandasTimestampDatacubeAxis, UnsliceableDatacubeAxis, FloatDatacubeAxis -from polytope_feature.datacube.backends.test_qubed_slicing import actual_slice +# from polytope_feature.datacube.backends.test_qubed_slicing import actual_slice from polytope_feature.datacube.transformations.datacube_type_change.datacube_type_change import TypeChangeStrToTimestamp, TypeChangeStrToTimedelta import pandas as pd from polytope_feature.datacube.transformations.datacube_mappers.mapper_types.healpix_nested import NestedHealpixGridMapper From 1b4305a2b5cd8ff187dc7b5c5b43ed585911a0e4 Mon Sep 17 00:00:00 2001 From: mathleur Date: Fri, 25 Apr 2025 11:10:33 +0200 Subject: [PATCH 23/28] start refactoring --- polytope_feature/engine/engine.py | 48 +++++++++++++- polytope_feature/engine/hullslicer.py | 49 +------------- polytope_feature/engine/qubed_slicer.py | 85 ++++++------------------- tests/test_qubed_extraction_engine.py | 8 ++- 4 files changed, 74 insertions(+), 116 deletions(-) diff --git a/polytope_feature/engine/engine.py b/polytope_feature/engine/engine.py index c714db0b2..d44facc31 100644 --- a/polytope_feature/engine/engine.py +++ b/polytope_feature/engine/engine.py @@ -2,7 +2,10 @@ from ..datacube.backends.datacube import Datacube from ..datacube.tensor_index_tree import TensorIndexTree -from ..shapes import ConvexPolytope +from ..shapes import ConvexPolytope, Product + +from ..datacube.datacube_axis import UnsliceableDatacubeAxis +from ..utility.list_tools import unique class Engine: @@ -17,3 +20,46 @@ def default(): from .hullslicer import HullSlicer return HullSlicer() + + def _unique_continuous_points(self, p: ConvexPolytope, datacube: Datacube): + for i, ax in enumerate(p._axes): + mapper = datacube.get_mapper(ax) + if self.ax_is_unsliceable.get(ax, None) is None: + self.ax_is_unsliceable[ax] = isinstance(mapper, UnsliceableDatacubeAxis) + if self.ax_is_unsliceable[ax]: + break + for j, val in enumerate(p.points): + p.points[j][i] = mapper.to_float(mapper.parse(p.points[j][i])) + # Remove duplicate points + unique(p.points) + + def pre_process_polytopes(self, datacube, polytopes): + for p in polytopes: + if isinstance(p, Product): + for poly in p.polytope(): + self._unique_continuous_points(poly, datacube) + else: + self._unique_continuous_points(p, datacube) + + def find_compressed_axes(self, datacube, polytopes): + # First determine compressable axes from input polytopes + compressable_axes = [] + for polytope in polytopes: + if polytope.is_orthogonal: + for ax in polytope.axes(): + compressable_axes.append(ax) + # Cross check this list with list of compressable axis from datacube + # (should not include any merged or coupled axes) + for compressed_axis in compressable_axes: + if compressed_axis in datacube.compressed_axes: + self.compressed_axes.append(compressed_axis) + # add the last axis of the grid always (longitude) as a compressed axis + k, last_value = _, datacube.axes[k] = datacube.axes.popitem() + self.compressed_axes.append(k) + + def remove_compressed_axis_in_union(self, polytopes): + for p in polytopes: + if p.is_in_union: + for axis in p.axes(): + if axis == self.compressed_axes[-1]: + self.compressed_axes.remove(axis) diff --git a/polytope_feature/engine/hullslicer.py b/polytope_feature/engine/hullslicer.py index 80feffd6d..64b50b640 100644 --- a/polytope_feature/engine/hullslicer.py +++ b/polytope_feature/engine/hullslicer.py @@ -6,13 +6,12 @@ import scipy.spatial from ..datacube.backends.datacube import Datacube -from ..datacube.datacube_axis import UnsliceableDatacubeAxis from ..datacube.tensor_index_tree import TensorIndexTree from ..shapes import ConvexPolytope, Product from ..utility.combinatorics import group, tensor_product from ..utility.exceptions import UnsliceableShapeError from ..utility.geometry import lerp -from ..utility.list_tools import argmax, argmin, unique +from ..utility.list_tools import argmax, argmin from .engine import Engine @@ -25,18 +24,6 @@ def __init__(self): self.remapped_vals = {} self.compressed_axes = [] - def _unique_continuous_points(self, p: ConvexPolytope, datacube: Datacube): - for i, ax in enumerate(p._axes): - mapper = datacube.get_mapper(ax) - if self.ax_is_unsliceable.get(ax, None) is None: - self.ax_is_unsliceable[ax] = isinstance(mapper, UnsliceableDatacubeAxis) - if self.ax_is_unsliceable[ax]: - break - for j, val in enumerate(p.points): - p.points[j][i] = mapper.to_float(mapper.parse(p.points[j][i])) - # Remove duplicate points - unique(p.points) - def _build_unsliceable_child(self, polytope, ax, node, datacube, lowers, next_nodes, slice_axis_idx): if not polytope.is_flat: raise UnsliceableShapeError(ax) @@ -139,9 +126,6 @@ def _build_branch(self, ax, node, datacube, next_nodes): self._build_unsliceable_child(polytope, ax, node, datacube, [lower], next_nodes, slice_axis_idx) else: values = self.find_values_between(polytope, ax, node, datacube, lower, upper) - # print(ax.name) - # print((lower, upper)) - # print(values) # NOTE: need to only remove the branches if the values are empty, # but only if there are no other possible children left in the tree that # we can append and if somehow this happens before and we need to remove, then what do we do?? @@ -183,29 +167,6 @@ def _build_branch(self, ax, node, datacube, next_nodes): del node["unsliced_polytopes"] - def find_compressed_axes(self, datacube, polytopes): - # First determine compressable axes from input polytopes - compressable_axes = [] - for polytope in polytopes: - if polytope.is_orthogonal: - for ax in polytope.axes(): - compressable_axes.append(ax) - # Cross check this list with list of compressable axis from datacube - # (should not include any merged or coupled axes) - for compressed_axis in compressable_axes: - if compressed_axis in datacube.compressed_axes: - self.compressed_axes.append(compressed_axis) - # add the last axis of the grid always (longitude) as a compressed axis - k, last_value = _, datacube.axes[k] = datacube.axes.popitem() - self.compressed_axes.append(k) - - def remove_compressed_axis_in_union(self, polytopes): - for p in polytopes: - if p.is_in_union: - for axis in p.axes(): - if axis == self.compressed_axes[-1]: - self.compressed_axes.remove(axis) - def extract(self, datacube: Datacube, polytopes: List[ConvexPolytope]): # Determine list of axes to compress self.find_compressed_axes(datacube, polytopes) @@ -214,12 +175,7 @@ def extract(self, datacube: Datacube, polytopes: List[ConvexPolytope]): self.remove_compressed_axis_in_union(polytopes) # Convert the polytope points to float type to support triangulation and interpolation - for p in polytopes: - if isinstance(p, Product): - for poly in p.polytope(): - self._unique_continuous_points(poly, datacube) - else: - self._unique_continuous_points(p, datacube) + self.pre_process_polytopes(datacube, polytopes) groups, input_axes = group(polytopes) datacube.validate(input_axes) @@ -246,7 +202,6 @@ def extract(self, datacube: Datacube, polytopes: List[ConvexPolytope]): final_polys.extend(poly.polytope()) else: final_polys.append(poly) - # r["unsliced_polytopes"] = set(new_c) r["unsliced_polytopes"] = set(final_polys) current_nodes = [r] for ax in datacube.axes.values(): diff --git a/polytope_feature/engine/qubed_slicer.py b/polytope_feature/engine/qubed_slicer.py index fe42b01a3..b0b1a392e 100644 --- a/polytope_feature/engine/qubed_slicer.py +++ b/polytope_feature/engine/qubed_slicer.py @@ -25,7 +25,7 @@ def __init__(self): self.ax_is_unsliceable = {} self.compressed_axes = [] - def _actual_slice(self, q: Qube, polytopes_to_slice, datacube_axes, datacube_transformations) -> 'Qube': + def _actual_slice(self, q: Qube, polytopes_to_slice, datacube, datacube_transformations) -> 'Qube': def find_polytopes_on_axis(axis_name, polytopes): polytopes_on_axis = [] @@ -34,23 +34,6 @@ def find_polytopes_on_axis(axis_name, polytopes): polytopes_on_axis.append(poly) return polytopes_on_axis - def change_poly_axis_type(axis_name, polytopes, datacube_axes): - axis = datacube_axes[axis_name] - # loop through the polytopes and change each polytopes's values according to axis - if isinstance(axis, UnsliceableDatacubeAxis): - return - - for poly in polytopes: - i = 0 - for k, ax_name in enumerate(poly._axes): - if ax_name == axis_name: - i = k - for j, val in enumerate(poly.points): - poly.points[j][i] = axis.to_float(axis.parse(poly.points[j][i])) - - # def _axes_compressed(): - # return {} - def change_datacube_val_types(child: Qube, datacube_transformations): axis_name = child.key transformation = datacube_transformations.get(axis_name, None) @@ -64,8 +47,8 @@ def change_datacube_val_types(child: Qube, datacube_transformations): return new_vals - def transform_upper_lower(axis_name, lower, upper, datacube_axes): - ax = datacube_axes[axis_name] + def transform_upper_lower(axis_name, lower, upper, datacube): + ax = datacube._axes[axis_name] if isinstance(ax, UnsliceableDatacubeAxis): return (lower, upper) tol = ax.tol @@ -74,14 +57,14 @@ def transform_upper_lower(axis_name, lower, upper, datacube_axes): return (lower, upper) - def _slice_second_grid_axis(axis_name, polytopes, datacube_axes, datacube_transformations, second_axis_vals) -> list[Qube]: + def _slice_second_grid_axis(axis_name, polytopes, datacube, datacube_transformations, second_axis_vals) -> list[Qube]: result = [] polytopes_on_axis = find_polytopes_on_axis(axis_name, polytopes) for poly in polytopes_on_axis: lower, upper, slice_axis_idx = poly.extents(axis_name) - new_lower, new_upper = transform_upper_lower(axis_name, lower, upper, datacube_axes) + new_lower, new_upper = transform_upper_lower(axis_name, lower, upper, datacube) found_vals = [v for v in second_axis_vals if new_lower <= v <= new_upper] if len(found_vals) == 0: @@ -90,7 +73,7 @@ def _slice_second_grid_axis(axis_name, polytopes, datacube_axes, datacube_transf # slice polytope along each value on child and keep resulting polytopes in memory sliced_polys = [] for val in found_vals: - ax = datacube_axes[axis_name] + ax = datacube._axes[axis_name] if not isinstance(ax, UnsliceableDatacubeAxis): fval = ax.to_float(val) # slice polytope along the value and add sliced polytope to list of polytopes in memory @@ -118,7 +101,7 @@ def _slice_second_grid_axis(axis_name, polytopes, datacube_axes, datacube_transf )]) return result - def _slice(q: Qube, polytopes, datacube_axes, datacube_transformations) -> list[Qube]: + def _slice(q: Qube, polytopes, datacube, datacube_transformations) -> list[Qube]: result = [] if len(q.children) == 0: @@ -142,7 +125,7 @@ def _slice(q: Qube, polytopes, datacube_axes, datacube_transformations) -> list[ first_ax_vals = mapper_transformation.first_axis_vals() - new_lower, new_upper = transform_upper_lower(grid_axes[0], lower, upper, datacube_axes) + new_lower, new_upper = transform_upper_lower(grid_axes[0], lower, upper, datacube) found_vals = [v for v in first_ax_vals if new_lower <= v <= new_upper] if len(found_vals) == 0: @@ -151,7 +134,7 @@ def _slice(q: Qube, polytopes, datacube_axes, datacube_transformations) -> list[ # slice polytope along each value on child and keep resulting polytopes in memory sliced_polys = [] for val in found_vals: - ax = datacube_axes[grid_axes[0]] + ax = datacube._axes[grid_axes[0]] if not isinstance(ax, UnsliceableDatacubeAxis): fval = ax.to_float(val) # slice polytope along the value and add sliced polytope to list of polytopes in memory @@ -159,7 +142,6 @@ def _slice(q: Qube, polytopes, datacube_axes, datacube_transformations) -> list[ sliced_polys.append(sliced_poly) # decide if axis should be compressed or not according to polytope # NOTE: actually the first grid axis will never be compressed - # axis_compressed = self.compressed_axes.get(grid_axes[0], False) axis_compressed = (grid_axes[0] in self.compressed_axes) # if it's not compressed, need to separate into different nodes to append to the tree @@ -172,7 +154,7 @@ def _slice(q: Qube, polytopes, datacube_axes, datacube_transformations) -> list[ # get second axis children through slicing children = _slice_second_grid_axis( - grid_axes[1], child_polytopes, datacube_axes, datacube_transformations, second_axis_vals) + grid_axes[1], child_polytopes, datacube, datacube_transformations, second_axis_vals) # If this node used to have children but now has none due to filtering, skip it. if not children: continue @@ -203,7 +185,7 @@ def _slice(q: Qube, polytopes, datacube_axes, datacube_transformations) -> list[ modified_vals = change_datacube_val_types(child, datacube_transformations) # here use the axis to transform lower and upper to right type too - new_lower, new_upper = transform_upper_lower(child.key, lower, upper, datacube_axes) + new_lower, new_upper = transform_upper_lower(child.key, lower, upper, datacube) found_vals = [v for v in modified_vals if new_lower <= v <= new_upper] if len(found_vals) == 0: @@ -212,14 +194,13 @@ def _slice(q: Qube, polytopes, datacube_axes, datacube_transformations) -> list[ # slice polytope along each value on child and keep resulting polytopes in memory sliced_polys = [] for val in found_vals: - ax = datacube_axes[child.key] + ax = datacube._axes[child.key] if not isinstance(ax, UnsliceableDatacubeAxis): fval = ax.to_float(val) # slice polytope along the value and add sliced polytope to list of polytopes in memory sliced_poly = slice(poly, child.key, fval, slice_axis_idx) sliced_polys.append(sliced_poly) # decide if axis should be compressed or not according to polytope - # axis_compressed = self.compressed_axes.get(child.key, False) axis_compressed = (child.key in self.compressed_axes) # if it's not compressed, need to separate into different nodes to append to the tree if not axis_compressed and len(found_vals) > 1: @@ -227,7 +208,7 @@ def _slice(q: Qube, polytopes, datacube_axes, datacube_transformations) -> list[ child_polytopes = [p for p in polytopes if p != poly] if sliced_polys[i]: child_polytopes.append(sliced_polys[i]) - children = _slice(child, child_polytopes, datacube_axes, datacube_transformations) + children = _slice(child, child_polytopes, datacube, datacube_transformations) # If this node used to have children but now has none due to filtering, skip it. if child.children and not children: continue @@ -248,7 +229,7 @@ def _slice(q: Qube, polytopes, datacube_axes, datacube_transformations) -> list[ child_polytopes.extend( [sliced_poly_ for sliced_poly_ in sliced_polys if sliced_poly_ is not None]) # create children - children = _slice(child, child_polytopes, datacube_axes, datacube_transformations) + children = _slice(child, child_polytopes, datacube, datacube_transformations) # If this node used to have children but now has none due to filtering, skip it. if child.children and not children: continue @@ -269,14 +250,9 @@ def _slice(q: Qube, polytopes, datacube_axes, datacube_transformations) -> list[ return result - # change the polytope point types here - for polytope in polytopes_to_slice: - for axis in polytope._axes: - change_poly_axis_type(axis, [polytope], datacube_axes) - - return Qube.root_node(_slice(q, polytopes_to_slice, datacube_axes, datacube_transformations)) + return Qube.root_node(_slice(q, polytopes_to_slice, datacube, datacube_transformations)) - def actual_slice(self, q: Qube, polytopes_to_slice, datacube_axes, datacube_transformations): + def actual_slice(self, q: Qube, polytopes_to_slice, datacube, datacube_transformations): groups, input_axes = group(polytopes_to_slice) combinations = tensor_product(groups) @@ -302,7 +278,7 @@ def actual_slice(self, q: Qube, polytopes_to_slice, datacube_axes, datacube_tran final_polys.append(poly) # Get the sliced Qube for each combi - r = self._actual_slice(q, final_polys, datacube_axes, datacube_transformations) + r = self._actual_slice(q, final_polys, datacube, datacube_transformations) sub_trees.append(r) final_tree = sub_trees[0] @@ -311,31 +287,10 @@ def actual_slice(self, q: Qube, polytopes_to_slice, datacube_axes, datacube_tran union(final_tree, sub_tree) return final_tree - def find_compressed_axes(self, datacube, polytopes): - # First determine compressable axes from input polytopes - compressable_axes = [] - for polytope in polytopes: - if polytope.is_orthogonal: - for ax in polytope.axes(): - compressable_axes.append(ax) - # Cross check this list with list of compressable axis from datacube - # (should not include any merged or coupled axes) - for compressed_axis in compressable_axes: - if compressed_axis in datacube.compressed_axes: - self.compressed_axes.append(compressed_axis) - # add the last axis of the grid always (longitude) as a compressed axis - k, last_value = _, datacube.axes[k] = datacube.axes.popitem() - self.compressed_axes.append(k) - - def remove_compressed_axis_in_union(self, polytopes): - for p in polytopes: - if p.is_in_union: - for axis in p.axes(): - if axis == self.compressed_axes[-1]: - self.compressed_axes.remove(axis) - def extract(self, datacube: Datacube, polytopes: List[ConvexPolytope]): self.find_compressed_axes(datacube, polytopes) + self.pre_process_polytopes(datacube, polytopes) assert isinstance(datacube, QubedDatacube) - tree = self.actual_slice(datacube.q, polytopes, datacube.datacube_axes, datacube.datacube_transformations) + tree = self.actual_slice(datacube.q, polytopes, datacube, + datacube.datacube_transformations) return tree diff --git a/tests/test_qubed_extraction_engine.py b/tests/test_qubed_extraction_engine.py index 991688e84..4b39a6457 100644 --- a/tests/test_qubed_extraction_engine.py +++ b/tests/test_qubed_extraction_engine.py @@ -7,7 +7,7 @@ from qubed import Qube import requests from polytope_feature.datacube.datacube_axis import PandasTimedeltaDatacubeAxis, PandasTimestampDatacubeAxis, UnsliceableDatacubeAxis, FloatDatacubeAxis -from polytope_feature.datacube.backends.test_qubed_slicing import actual_slice +# from polytope_feature.datacube.backends.test_qubed_slicing import actual_slice from polytope_feature.datacube.transformations.datacube_type_change.datacube_type_change import TypeChangeStrToTimestamp, TypeChangeStrToTimedelta import pandas as pd from polytope_feature.datacube.transformations.datacube_mappers.mapper_types.healpix_nested import NestedHealpixGridMapper @@ -153,7 +153,7 @@ def find_relevant_subcube_from_request(request, qube_url): ConvexPolytope(["dataset"], [["climate-dt"]]), ConvexPolytope(["class"], [["d1"]]), ConvexPolytope(["date"], [[pd.Timestamp("20220811")]]), - ConvexPolytope(["latitude", "longitude"], [[0, 0], [5, 5], [0, 5]])) + ConvexPolytope(["latitude", "longitude"], [[0, 0], [0.5, 0.5], [0, 0.5]])) qubeddatacube = QubedDatacube(fdb_tree, datacube_axes, datacube_transformations) slicer = QubedSlicer() @@ -166,6 +166,8 @@ def find_relevant_subcube_from_request(request, qube_url): result = self_API.retrieve(request) time2 = time.time() +print(result) + print("TIME EXTRACTING USING QUBED") print(time2 - time1) @@ -231,7 +233,7 @@ def find_relevant_subcube_from_request(request, qube_url): ConvexPolytope(["dataset"], [["climate-dt"]]), ConvexPolytope(["class"], [["d1"]]), ConvexPolytope(["date"], [[pd.Timestamp("20220811")]]), - ConvexPolytope(["latitude", "longitude"], [[0, 0], [5, 5], [0, 5]])) + ConvexPolytope(["latitude", "longitude"], [[0, 0], [0.5, 0.5], [0, 0.5]])) time3 = time.time() result = self_API.retrieve(request) From d3548dfdca0633a23491aad828f8cabe7e5aa3fe Mon Sep 17 00:00:00 2001 From: mathleur Date: Mon, 28 Apr 2025 11:42:42 +0200 Subject: [PATCH 24/28] start incorporating transformations inside qubed --- polytope_feature/datacube/backends/qubed.py | 18 +++++++++++++++- polytope_feature/datacube/datacube_axis.py | 18 ++++++++++++++++ polytope_feature/engine/qubed_slicer.py | 24 ++++++++++++++++----- 3 files changed, 54 insertions(+), 6 deletions(-) diff --git a/polytope_feature/datacube/backends/qubed.py b/polytope_feature/datacube/backends/qubed.py index cfe7b31fc..3af789c12 100644 --- a/polytope_feature/datacube/backends/qubed.py +++ b/polytope_feature/datacube/backends/qubed.py @@ -43,7 +43,7 @@ def __init__( self.fdb_coordinates = {} # TODO: we instead now have a list of axes with the actual axes types... - + # TODO: here use the qubed to find all axes names and then get the values from the first val of the qubed and then apply transformations to get the actual right axis type... for axis_name in datacube_axes: axis = datacube_axes[axis_name] self.fdb_coordinates[axis_name] = [axis.type] @@ -70,6 +70,22 @@ def __init__( val = self._axes[name].type self._check_and_add_axes(options, name, val) + def get_indices(self, path_node, axis, lower, upper, method=None): + """ + Given a path to a subset of the datacube, return the discrete indexes which exist between + two non-discrete values (lower, upper) for a particular axis (given by label) + If lower and upper are equal, returns the index which exactly matches that value (if it exists) + e.g. returns integer discrete points between two floats + """ + # path = self.fit_path(path) + indexes = axis.find_indexes(path_node, self) + + idx_between = axis.find_indices_between(indexes, lower, upper, self, method) + + logging.debug(f"For axis {axis.name} between {lower} and {upper}, found indices {idx_between}") + + return idx_between + def get(self, requests, context=None): if context is None: context = {} diff --git a/polytope_feature/datacube/datacube_axis.py b/polytope_feature/datacube/datacube_axis.py index 8840a8a2f..28db4d117 100644 --- a/polytope_feature/datacube/datacube_axis.py +++ b/polytope_feature/datacube/datacube_axis.py @@ -78,6 +78,24 @@ def find_indexes(self, path, datacube): indexes = transformation.find_modified_indexes(indexes, path, datacube, self) return indexes + def find_standard_indexes_node(self, path, datacube): + # TODO: change to use the node instead of a path when we have a Qubed datacube backend + unmapped_path = {} + path_copy = deepcopy(path) + print(path) + for key in path_copy: + axis = datacube._axes[key] + (path, unmapped_path) = axis.unmap_to_datacube(path, unmapped_path) + subarray = datacube.select(path, unmapped_path) + return datacube.datacube_natural_indexes(self, subarray) + + def find_indexes_node(self, path_node, datacube): + indexes = self.find_standard_indexes_node(path_node, datacube) + for transformation in self.transformations[::-1]: + # TODO: change to use the node instead of a path when we have a Qubed dataucbe backend + indexes = transformation.find_modified_indexes(indexes, path, datacube, self) + return indexes + def offset(self, value): offset = 0 for transformation in self.transformations[::-1]: diff --git a/polytope_feature/engine/qubed_slicer.py b/polytope_feature/engine/qubed_slicer.py index b0b1a392e..70b40fe68 100644 --- a/polytope_feature/engine/qubed_slicer.py +++ b/polytope_feature/engine/qubed_slicer.py @@ -14,17 +14,26 @@ from ..datacube.backends.datacube import Datacube -# TODO: create a class for qubed slicer -# TODO: turn actual_slice into extract, which only takes in a Datacube instead of a Qube + datacube_axes + datacube_transformations - -# TODO: create a Qube datacube, which takes in a Qube and exposes the Qube + datacube_axes + datacube_transformations - class QubedSlicer(Engine): def __init__(self): self.ax_is_unsliceable = {} self.compressed_axes = [] + def find_datacube_vals(): + # TODO + pass + + def find_values_between(self, polytope, ax, node, datacube, lower, upper): + # TODO + tol = ax.tol + lower = ax.from_float(lower - tol) + upper = ax.from_float(upper + tol) + + # values = datacube.get_indices(flattened, ax, lower, upper, method) + # return values + pass + def _actual_slice(self, q: Qube, polytopes_to_slice, datacube, datacube_transformations) -> 'Qube': def find_polytopes_on_axis(axis_name, polytopes): @@ -38,6 +47,9 @@ def change_datacube_val_types(child: Qube, datacube_transformations): axis_name = child.key transformation = datacube_transformations.get(axis_name, None) child_vals = child.values + + # TODO: use axis.find_indexes_between to find the right child_vals + # TODO: actually, build same as find_values_between(self, polytope, ax, node, datacube, lower, upper) by writing new functions in qubed backend new_vals = [] for val in child_vals: if transformation: @@ -161,6 +173,8 @@ def _slice(q: Qube, polytopes, datacube, datacube_transformations) -> list[Qube] if isinstance(found_val, pd.Timedelta) or isinstance(found_val, pd.Timestamp): found_val = [str(found_val)] + # TODO: remap the found_val using self.remap_values like in the hullslicer + # TODO: when we have an axis that we would like to merge with another, we should skip the node creation here # and instead keep/cache the value to merge with the node from before?? From 05948acc269377bff2f8f198479b64cbf36919a6 Mon Sep 17 00:00:00 2001 From: mathleur Date: Mon, 28 Apr 2025 17:52:13 +0200 Subject: [PATCH 25/28] add everything except grid transformations --- polytope_feature/datacube/backends/qubed.py | 13 +++++-- polytope_feature/datacube/datacube_axis.py | 25 ++++++++------ .../datacube_reverse/datacube_reverse.py | 6 ++++ .../datacube_type_change.py | 16 ++++++--- polytope_feature/engine/qubed_slicer.py | 34 ++++++++++++------- tests/test_qubed_extraction_engine.py | 13 ++++--- 6 files changed, 70 insertions(+), 37 deletions(-) diff --git a/polytope_feature/datacube/backends/qubed.py b/polytope_feature/datacube/backends/qubed.py index 3af789c12..b1d3721e8 100644 --- a/polytope_feature/datacube/backends/qubed.py +++ b/polytope_feature/datacube/backends/qubed.py @@ -6,6 +6,7 @@ from ...utility.geometry import nearest_pt import pygribjump as pygj from qubed.value_types import QEnum +import numpy as np from .datacube import Datacube, TensorIndexTree @@ -46,7 +47,7 @@ def __init__( # TODO: here use the qubed to find all axes names and then get the values from the first val of the qubed and then apply transformations to get the actual right axis type... for axis_name in datacube_axes: axis = datacube_axes[axis_name] - self.fdb_coordinates[axis_name] = [axis.type] + self.fdb_coordinates[axis_name] = [axis.type_eg] self.fdb_coordinates["values"] = [] for name, values in self.fdb_coordinates.items(): @@ -67,9 +68,15 @@ def __init__( if opt.axis_name == name: options = opt - val = self._axes[name].type + val = self._axes[name].type_eg self._check_and_add_axes(options, name, val) + def datacube_natural_indexes(self, qube_node): + if qube_node is not None: + return np.asarray(list(qube_node.values)) + else: + return [] + def get_indices(self, path_node, axis, lower, upper, method=None): """ Given a path to a subset of the datacube, return the discrete indexes which exist between @@ -78,7 +85,7 @@ def get_indices(self, path_node, axis, lower, upper, method=None): e.g. returns integer discrete points between two floats """ # path = self.fit_path(path) - indexes = axis.find_indexes(path_node, self) + indexes = axis.find_indexes_node(path_node, self) idx_between = axis.find_indices_between(indexes, lower, upper, self, method) diff --git a/polytope_feature/datacube/datacube_axis.py b/polytope_feature/datacube/datacube_axis.py index 28db4d117..87ac79b21 100644 --- a/polytope_feature/datacube/datacube_axis.py +++ b/polytope_feature/datacube/datacube_axis.py @@ -78,22 +78,20 @@ def find_indexes(self, path, datacube): indexes = transformation.find_modified_indexes(indexes, path, datacube, self) return indexes - def find_standard_indexes_node(self, path, datacube): - # TODO: change to use the node instead of a path when we have a Qubed datacube backend - unmapped_path = {} - path_copy = deepcopy(path) - print(path) - for key in path_copy: - axis = datacube._axes[key] - (path, unmapped_path) = axis.unmap_to_datacube(path, unmapped_path) - subarray = datacube.select(path, unmapped_path) - return datacube.datacube_natural_indexes(self, subarray) + def find_standard_indexes_node(self, path_node, datacube): + return datacube.datacube_natural_indexes(path_node) def find_indexes_node(self, path_node, datacube): indexes = self.find_standard_indexes_node(path_node, datacube) + # path = {self.name: tuple(path_node.values)} + if path_node: + path = {path_node.key: tuple(path_node.values)} + else: + path = {self.name: tuple()} for transformation in self.transformations[::-1]: - # TODO: change to use the node instead of a path when we have a Qubed dataucbe backend + print(indexes) indexes = transformation.find_modified_indexes(indexes, path, datacube, self) + print(indexes) return indexes def offset(self, value): @@ -214,6 +212,7 @@ def __init__(self): # TODO: Maybe here, store transformations as a dico instead self.transformations = [] self.type = 0 + self.type_eg = 0 self.can_round = True def parse(self, value: Any) -> Any: @@ -236,6 +235,7 @@ def __init__(self): self.range = None self.transformations = [] self.type = 0.0 + self.type_eg = 0.0 self.can_round = True def parse(self, value: Any) -> Any: @@ -258,6 +258,7 @@ def __init__(self): self.range = None self.transformations = [] self.type = pd.Timestamp("2000-01-01T00:00:00") + self.type_eg = "20000101T000000" self.can_round = False def parse(self, value: Any) -> Any: @@ -288,6 +289,7 @@ def __init__(self): self.range = None self.transformations = [] self.type = np.timedelta64(0, "s") + self.type_eg = "0000" self.can_round = False def parse(self, value: Any) -> Any: @@ -319,6 +321,7 @@ def __init__(self): self.transformations = [] self.can_round = False self.type = "" + self.type_eg = "" def parse(self, value: Any) -> Any: return value diff --git a/polytope_feature/datacube/transformations/datacube_reverse/datacube_reverse.py b/polytope_feature/datacube/transformations/datacube_reverse/datacube_reverse.py index baa38009d..4d3232910 100644 --- a/polytope_feature/datacube/transformations/datacube_reverse/datacube_reverse.py +++ b/polytope_feature/datacube/transformations/datacube_reverse/datacube_reverse.py @@ -1,5 +1,6 @@ from ....utility.list_tools import bisect_left_cmp, bisect_right_cmp from ..datacube_transformations import DatacubeAxisTransformation +import numpy as np class DatacubeAxisReverse(DatacubeAxisTransformation): @@ -24,12 +25,17 @@ def unwanted_axes(self): def find_modified_indexes(self, indexes, path, datacube, axis): if axis.name in datacube.complete_axes: + # if isinstance(indexes, list): + # indexes.sort() + # ordered_indices = indexes + # else: ordered_indices = indexes.sort_values() else: ordered_indices = indexes return ordered_indices def find_indices_between(self, indexes, low, up, datacube, method, indexes_between_ranges, axis): + # indexes = np.asarray(indexes) indexes_between_ranges = [] if axis.name == self.name: if axis.name in datacube.complete_axes: diff --git a/polytope_feature/datacube/transformations/datacube_type_change/datacube_type_change.py b/polytope_feature/datacube/transformations/datacube_type_change/datacube_type_change.py index 7ea518c05..2a96f4103 100644 --- a/polytope_feature/datacube/transformations/datacube_type_change/datacube_type_change.py +++ b/polytope_feature/datacube/transformations/datacube_type_change/datacube_type_change.py @@ -84,14 +84,17 @@ def __init__(self, axis_name, new_type): def transform_type(self, value): try: - return pd.Timestamp(value) + return pd.Timestamp(str(value)) except ValueError: return None def make_str(self, value): values = [] for val in value: - values.append(val.strftime("%Y%m%d")) + if isinstance(val, str): + values.append(val) + else: + values.append(val.strftime("%Y%m%d")) return tuple(values) @@ -111,9 +114,12 @@ def transform_type(self, value): def make_str(self, value): values = [] for val in value: - hours = int(val.total_seconds() // 3600) - mins = int((val.total_seconds() % 3600) // 60) - values.append(f"{hours:02d}{mins:02d}") + if isinstance(val, str): + values.append(val) + else: + hours = int(val.total_seconds() // 3600) + mins = int((val.total_seconds() % 3600) // 60) + values.append(f"{hours:02d}{mins:02d}") return tuple(values) diff --git a/polytope_feature/engine/qubed_slicer.py b/polytope_feature/engine/qubed_slicer.py index 70b40fe68..ccc167066 100644 --- a/polytope_feature/engine/qubed_slicer.py +++ b/polytope_feature/engine/qubed_slicer.py @@ -25,14 +25,18 @@ def find_datacube_vals(): pass def find_values_between(self, polytope, ax, node, datacube, lower, upper): - # TODO + if isinstance(ax, UnsliceableDatacubeAxis): + return [v for v in node.values if lower <= v <= upper] + tol = ax.tol lower = ax.from_float(lower - tol) upper = ax.from_float(upper + tol) + method = polytope.method + # values = datacube.get_indices(flattened, ax, lower, upper, method) - # return values - pass + values = datacube.get_indices(node, ax, lower, upper, method) + return values def _actual_slice(self, q: Qube, polytopes_to_slice, datacube, datacube_transformations) -> 'Qube': @@ -133,12 +137,14 @@ def _slice(q: Qube, polytopes, datacube, datacube_transformations) -> list[Qube] polytopes_on_axis = find_polytopes_on_axis(grid_axes[0], polytopes) for poly in polytopes_on_axis: + ax = datacube._axes[grid_axes[0]] lower, upper, slice_axis_idx = poly.extents(grid_axes[0]) first_ax_vals = mapper_transformation.first_axis_vals() - new_lower, new_upper = transform_upper_lower(grid_axes[0], lower, upper, datacube) - found_vals = [v for v in first_ax_vals if new_lower <= v <= new_upper] + # new_lower, new_upper = transform_upper_lower(grid_axes[0], lower, upper, datacube) + # found_vals = [v for v in first_ax_vals if new_lower <= v <= new_upper] + found_vals = self.find_values_between(poly, ax, None, datacube, lower, upper) if len(found_vals) == 0: continue @@ -146,7 +152,7 @@ def _slice(q: Qube, polytopes, datacube, datacube_transformations) -> list[Qube] # slice polytope along each value on child and keep resulting polytopes in memory sliced_polys = [] for val in found_vals: - ax = datacube._axes[grid_axes[0]] + # ax = datacube._axes[grid_axes[0]] if not isinstance(ax, UnsliceableDatacubeAxis): fval = ax.to_float(val) # slice polytope along the value and add sliced polytope to list of polytopes in memory @@ -191,16 +197,18 @@ def _slice(q: Qube, polytopes, datacube, datacube_transformations) -> list[Qube] # here now first change the values in the polytopes on the axis to reflect the axis type for poly in polytopes_on_axis: + ax = datacube._axes[child.key] # find extents of polytope on child.key lower, upper, slice_axis_idx = poly.extents(child.key) - # find values on child that are within extents - # here first change the child values of the datacube ie the Qubed tree to their right type with the transformation - modified_vals = change_datacube_val_types(child, datacube_transformations) + # # find values on child that are within extents + # # here first change the child values of the datacube ie the Qubed tree to their right type with the transformation + # modified_vals = change_datacube_val_types(child, datacube_transformations) - # here use the axis to transform lower and upper to right type too - new_lower, new_upper = transform_upper_lower(child.key, lower, upper, datacube) - found_vals = [v for v in modified_vals if new_lower <= v <= new_upper] + # # here use the axis to transform lower and upper to right type too + # new_lower, new_upper = transform_upper_lower(child.key, lower, upper, datacube) + # found_vals = [v for v in modified_vals if new_lower <= v <= new_upper] + found_vals = self.find_values_between(poly, ax, child, datacube, lower, upper) if len(found_vals) == 0: continue @@ -208,7 +216,7 @@ def _slice(q: Qube, polytopes, datacube, datacube_transformations) -> list[Qube] # slice polytope along each value on child and keep resulting polytopes in memory sliced_polys = [] for val in found_vals: - ax = datacube._axes[child.key] + # ax = datacube._axes[child.key] if not isinstance(ax, UnsliceableDatacubeAxis): fval = ax.to_float(val) # slice polytope along the value and add sliced polytope to list of polytopes in memory diff --git a/tests/test_qubed_extraction_engine.py b/tests/test_qubed_extraction_engine.py index 4b39a6457..91971ad78 100644 --- a/tests/test_qubed_extraction_engine.py +++ b/tests/test_qubed_extraction_engine.py @@ -75,8 +75,9 @@ def find_relevant_subcube_from_request(request, qube_url): "dataset": UnsliceableDatacubeAxis(), "class": UnsliceableDatacubeAxis(), "date": PandasTimestampDatacubeAxis(), - "latitude": FloatDatacubeAxis(), - "longitude": FloatDatacubeAxis()} + # "latitude": FloatDatacubeAxis(), + # "longitude": FloatDatacubeAxis() + } time_val = pd.Timedelta(hours=0, minutes=0) date_val = pd.Timestamp("20300101T000000") @@ -98,8 +99,8 @@ def find_relevant_subcube_from_request(request, qube_url): # "axis_name": "date", # "transformations": [{"name": "merge", "other_axis": "time", "linkers": ["T", "00"]}], # }, - # {"axis_name": "date", "transformations": [{"name": "type_change", "type": "date"}]}, - # {"axis_name": "time", "transformations": [{"name": "type_change", "type": "time"}]}, + {"axis_name": "date", "transformations": [{"name": "type_change", "type": "date"}]}, + {"axis_name": "time", "transformations": [{"name": "type_change", "type": "time"}]}, { "axis_name": "values", "transformations": [ @@ -153,7 +154,9 @@ def find_relevant_subcube_from_request(request, qube_url): ConvexPolytope(["dataset"], [["climate-dt"]]), ConvexPolytope(["class"], [["d1"]]), ConvexPolytope(["date"], [[pd.Timestamp("20220811")]]), - ConvexPolytope(["latitude", "longitude"], [[0, 0], [0.5, 0.5], [0, 0.5]])) + ConvexPolytope(["latitude", "longitude"], [[0, 0], [0.5, 0.5], [0, 0.5]]) + # ConvexPolytope(["latitude", "longitude"], [[0, 0], [-0.5, -0.5], [0, -0.5]]) + ) qubeddatacube = QubedDatacube(fdb_tree, datacube_axes, datacube_transformations) slicer = QubedSlicer() From 931bc30707d47b8892106e0f5481976d374587a6 Mon Sep 17 00:00:00 2001 From: mathleur Date: Tue, 29 Apr 2025 12:08:31 +0200 Subject: [PATCH 26/28] add support for cyclic axes on qubed datacube --- .../datacube/backends/datacube.py | 1 + polytope_feature/datacube/backends/qubed.py | 4 +-- polytope_feature/datacube/datacube_axis.py | 14 ++++---- polytope_feature/engine/qubed_slicer.py | 36 ++++++++++++++----- tests/test_qubed_extraction_engine.py | 4 +-- 5 files changed, 40 insertions(+), 19 deletions(-) diff --git a/polytope_feature/datacube/backends/datacube.py b/polytope_feature/datacube/backends/datacube.py index 161517b02..76910e296 100644 --- a/polytope_feature/datacube/backends/datacube.py +++ b/polytope_feature/datacube/backends/datacube.py @@ -128,6 +128,7 @@ def get_indices(self, path: DatacubePath, axis, lower, upper, method=None): e.g. returns integer discrete points between two floats """ path = self.fit_path(path) + print(path) indexes = axis.find_indexes(path, self) idx_between = axis.find_indices_between(indexes, lower, upper, self, method) diff --git a/polytope_feature/datacube/backends/qubed.py b/polytope_feature/datacube/backends/qubed.py index b1d3721e8..21aaf521c 100644 --- a/polytope_feature/datacube/backends/qubed.py +++ b/polytope_feature/datacube/backends/qubed.py @@ -77,7 +77,7 @@ def datacube_natural_indexes(self, qube_node): else: return [] - def get_indices(self, path_node, axis, lower, upper, method=None): + def get_indices(self, path, path_node, axis, lower, upper, method=None): """ Given a path to a subset of the datacube, return the discrete indexes which exist between two non-discrete values (lower, upper) for a particular axis (given by label) @@ -85,7 +85,7 @@ def get_indices(self, path_node, axis, lower, upper, method=None): e.g. returns integer discrete points between two floats """ # path = self.fit_path(path) - indexes = axis.find_indexes_node(path_node, self) + indexes = axis.find_indexes_node(path_node, self, path) idx_between = axis.find_indices_between(indexes, lower, upper, self, method) diff --git a/polytope_feature/datacube/datacube_axis.py b/polytope_feature/datacube/datacube_axis.py index 87ac79b21..b5b4869f5 100644 --- a/polytope_feature/datacube/datacube_axis.py +++ b/polytope_feature/datacube/datacube_axis.py @@ -81,17 +81,17 @@ def find_indexes(self, path, datacube): def find_standard_indexes_node(self, path_node, datacube): return datacube.datacube_natural_indexes(path_node) - def find_indexes_node(self, path_node, datacube): + def find_indexes_node(self, path_node, datacube, path): indexes = self.find_standard_indexes_node(path_node, datacube) # path = {self.name: tuple(path_node.values)} - if path_node: - path = {path_node.key: tuple(path_node.values)} - else: - path = {self.name: tuple()} + if not path: + if path_node: + path = {path_node.key: tuple(path_node.values)} + else: + path = {self.name: tuple()} for transformation in self.transformations[::-1]: - print(indexes) indexes = transformation.find_modified_indexes(indexes, path, datacube, self) - print(indexes) + # print(indexes) return indexes def offset(self, value): diff --git a/polytope_feature/engine/qubed_slicer.py b/polytope_feature/engine/qubed_slicer.py index ccc167066..57df7dfbc 100644 --- a/polytope_feature/engine/qubed_slicer.py +++ b/polytope_feature/engine/qubed_slicer.py @@ -13,18 +13,20 @@ from typing import List from ..datacube.backends.datacube import Datacube +import math class QubedSlicer(Engine): def __init__(self): self.ax_is_unsliceable = {} self.compressed_axes = [] + self.remapped_vals = {} def find_datacube_vals(): # TODO pass - def find_values_between(self, polytope, ax, node, datacube, lower, upper): + def find_values_between(self, polytope, ax, node, datacube, lower, upper, path=None): if isinstance(ax, UnsliceableDatacubeAxis): return [v for v in node.values if lower <= v <= upper] @@ -35,9 +37,21 @@ def find_values_between(self, polytope, ax, node, datacube, lower, upper): method = polytope.method # values = datacube.get_indices(flattened, ax, lower, upper, method) - values = datacube.get_indices(node, ax, lower, upper, method) + values = datacube.get_indices(path, node, ax, lower, upper, method) return values + def remap_values(self, ax, value): + remapped_val = self.remapped_vals.get((value, ax.name), None) + if remapped_val is None: + remapped_val = value + if ax.is_cyclic: + remapped_val_interm = ax.remap([value, value])[0] + remapped_val = (remapped_val_interm[0] + remapped_val_interm[1]) / 2 + if ax.can_round: + remapped_val = round(remapped_val, int(-math.log10(ax.tol))) + self.remapped_vals[(value, ax.name)] = remapped_val + return remapped_val + def _actual_slice(self, q: Qube, polytopes_to_slice, datacube, datacube_transformations) -> 'Qube': def find_polytopes_on_axis(axis_name, polytopes): @@ -73,15 +87,17 @@ def transform_upper_lower(axis_name, lower, upper, datacube): return (lower, upper) - def _slice_second_grid_axis(axis_name, polytopes, datacube, datacube_transformations, second_axis_vals) -> list[Qube]: + def _slice_second_grid_axis(axis_name, polytopes, datacube, datacube_transformations, second_axis_vals, path) -> list[Qube]: result = [] polytopes_on_axis = find_polytopes_on_axis(axis_name, polytopes) for poly in polytopes_on_axis: + ax = datacube._axes[axis_name] lower, upper, slice_axis_idx = poly.extents(axis_name) - new_lower, new_upper = transform_upper_lower(axis_name, lower, upper, datacube) - found_vals = [v for v in second_axis_vals if new_lower <= v <= new_upper] + # new_lower, new_upper = transform_upper_lower(axis_name, lower, upper, datacube) + # found_vals = [v for v in second_axis_vals if new_lower <= v <= new_upper] + found_vals = self.find_values_between(poly, ax, None, datacube, lower, upper, path) if len(found_vals) == 0: continue @@ -89,7 +105,7 @@ def _slice_second_grid_axis(axis_name, polytopes, datacube, datacube_transformat # slice polytope along each value on child and keep resulting polytopes in memory sliced_polys = [] for val in found_vals: - ax = datacube._axes[axis_name] + # ax = datacube._axes[axis_name] if not isinstance(ax, UnsliceableDatacubeAxis): fval = ax.to_float(val) # slice polytope along the value and add sliced polytope to list of polytopes in memory @@ -102,6 +118,7 @@ def _slice_second_grid_axis(axis_name, polytopes, datacube, datacube_transformat new_found_vals = [] for found_val in found_vals: + found_val = self.remap_values(ax, found_val) if isinstance(found_val, pd.Timedelta) or isinstance(found_val, pd.Timestamp): new_found_vals.append(str(found_val)) else: @@ -164,15 +181,16 @@ def _slice(q: Qube, polytopes, datacube, datacube_transformations) -> list[Qube] # if it's not compressed, need to separate into different nodes to append to the tree for i, found_val in enumerate(found_vals): + found_val = self.remap_values(ax, found_val) child_polytopes = [p for p in polytopes if p != poly] if sliced_polys[i]: child_polytopes.append(sliced_polys[i]) second_axis_vals = mapper_transformation.second_axis_vals([found_val]) - + flattened_path = {grid_axes[0]: (found_val,)} # get second axis children through slicing children = _slice_second_grid_axis( - grid_axes[1], child_polytopes, datacube, datacube_transformations, second_axis_vals) + grid_axes[1], child_polytopes, datacube, datacube_transformations, second_axis_vals, flattened_path) # If this node used to have children but now has none due to filtering, skip it. if not children: continue @@ -227,6 +245,7 @@ def _slice(q: Qube, polytopes, datacube, datacube_transformations) -> list[Qube] # if it's not compressed, need to separate into different nodes to append to the tree if not axis_compressed and len(found_vals) > 1: for i, found_val in enumerate(found_vals): + found_val = self.remap_values(ax, found_val) child_polytopes = [p for p in polytopes if p != poly] if sliced_polys[i]: child_polytopes.append(sliced_polys[i]) @@ -258,6 +277,7 @@ def _slice(q: Qube, polytopes, datacube, datacube_transformations) -> list[Qube] new_found_vals = [] for found_val in found_vals: + found_val = self.remap_values(ax, found_val) if isinstance(found_val, pd.Timedelta) or isinstance(found_val, pd.Timestamp): new_found_vals.append(str(found_val)) else: diff --git a/tests/test_qubed_extraction_engine.py b/tests/test_qubed_extraction_engine.py index 91971ad78..6b8c11765 100644 --- a/tests/test_qubed_extraction_engine.py +++ b/tests/test_qubed_extraction_engine.py @@ -154,8 +154,8 @@ def find_relevant_subcube_from_request(request, qube_url): ConvexPolytope(["dataset"], [["climate-dt"]]), ConvexPolytope(["class"], [["d1"]]), ConvexPolytope(["date"], [[pd.Timestamp("20220811")]]), - ConvexPolytope(["latitude", "longitude"], [[0, 0], [0.5, 0.5], [0, 0.5]]) - # ConvexPolytope(["latitude", "longitude"], [[0, 0], [-0.5, -0.5], [0, -0.5]]) + # ConvexPolytope(["latitude", "longitude"], [[0, 0], [0.5, 0.5], [0, 0.5]]) + ConvexPolytope(["latitude", "longitude"], [[0, 0], [-0.5, -0.5], [0, -0.5]]) ) qubeddatacube = QubedDatacube(fdb_tree, datacube_axes, datacube_transformations) From e30f17f6543e0f6cd9f2d13fb33644ba61df56b5 Mon Sep 17 00:00:00 2001 From: mathleur Date: Tue, 29 Apr 2025 15:03:52 +0200 Subject: [PATCH 27/28] remove unnecessary md5 hash assignment --- polytope_feature/datacube/backends/qubed.py | 2 +- tests/test_qubed_extraction_engine.py | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/polytope_feature/datacube/backends/qubed.py b/polytope_feature/datacube/backends/qubed.py index 21aaf521c..6d40fe266 100644 --- a/polytope_feature/datacube/backends/qubed.py +++ b/polytope_feature/datacube/backends/qubed.py @@ -122,7 +122,7 @@ def get(self, requests, context=None): for i, key in enumerate(compressed_request[0].keys()): uncompressed_request[key] = combi[i] # TODO: get the hash from somewhere... - self.grid_md5_hash = "cbda19e48d4d7e5e22641154878b9b22" + # self.grid_md5_hash = "cbda19e48d4d7e5e22641154878b9b22" complete_uncompressed_request = (uncompressed_request, compressed_request[1], self.grid_md5_hash) complete_list_complete_uncompressed_requests.append(complete_uncompressed_request) complete_fdb_decoding_info.append(fdb_requests_decoding_info[j]) diff --git a/tests/test_qubed_extraction_engine.py b/tests/test_qubed_extraction_engine.py index 6b8c11765..91971ad78 100644 --- a/tests/test_qubed_extraction_engine.py +++ b/tests/test_qubed_extraction_engine.py @@ -154,8 +154,8 @@ def find_relevant_subcube_from_request(request, qube_url): ConvexPolytope(["dataset"], [["climate-dt"]]), ConvexPolytope(["class"], [["d1"]]), ConvexPolytope(["date"], [[pd.Timestamp("20220811")]]), - # ConvexPolytope(["latitude", "longitude"], [[0, 0], [0.5, 0.5], [0, 0.5]]) - ConvexPolytope(["latitude", "longitude"], [[0, 0], [-0.5, -0.5], [0, -0.5]]) + ConvexPolytope(["latitude", "longitude"], [[0, 0], [0.5, 0.5], [0, 0.5]]) + # ConvexPolytope(["latitude", "longitude"], [[0, 0], [-0.5, -0.5], [0, -0.5]]) ) qubeddatacube = QubedDatacube(fdb_tree, datacube_axes, datacube_transformations) From edf527e5fe5f89b26ad234235522ce2ec79440f4 Mon Sep 17 00:00:00 2001 From: mathleur Date: Tue, 24 Jun 2025 10:21:08 +0200 Subject: [PATCH 28/28] update gitignore --- .gitignore | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/.gitignore b/.gitignore index 69063359a..a7733b802 100644 --- a/.gitignore +++ b/.gitignore @@ -31,4 +31,10 @@ venv_python3_11 *.txt tests/data venv_gj_iterator -new_venv_gj_iterator \ No newline at end of file +new_venv_gj_iterator +**/build +*.so +*.lock +**/_version.py +rust_deployment_venv +**/target \ No newline at end of file