From f1639ab13cb68a8d9fad7735f2855fe198e22bcc Mon Sep 17 00:00:00 2001 From: Khoroshevskyi Date: Mon, 6 Apr 2026 20:56:11 -0400 Subject: [PATCH 1/3] Migration to peprs --- pepdbagent/const.py | 1 - pepdbagent/models.py | 4 +- pepdbagent/modules/project.py | 73 +++++---- pepdbagent/modules/sample.py | 33 ++--- pepdbagent/modules/view.py | 27 ++-- pepdbagent/utils.py | 2 +- requirements/requirements-all.txt | 2 +- .../amendments1/project_config.yaml | 1 - .../namespace2/derive/project_config.yaml | 2 +- .../namespace3/piface/project_config.yaml | 2 +- .../namespace3/remove/project_config.yaml | 1 - .../amendments1/project_config.yaml | 1 - .../private_test/derive/project_config.yaml | 1 - .../private_test/remove/project_config.yaml | 1 - tests/test_project.py | 58 +++++--- tests/test_project_history.py | 62 ++++---- tests/test_samples.py | 12 +- tests/test_updates.py | 140 +++++++++--------- tests/utils.py | 8 +- 19 files changed, 218 insertions(+), 213 deletions(-) diff --git a/pepdbagent/const.py b/pepdbagent/const.py index cf0577b..2bf7c93 100644 --- a/pepdbagent/const.py +++ b/pepdbagent/const.py @@ -6,7 +6,6 @@ DESCRIPTION_KEY = "description" NAME_KEY = "name" -# from peppy.const import SAMPLE_RAW_DICT_KEY, SUBSAMPLE_RAW_LIST_KEY DEFAULT_OFFSET = 0 DEFAULT_LIMIT = 100 diff --git a/pepdbagent/models.py b/pepdbagent/models.py index 95aa783..6291dc7 100644 --- a/pepdbagent/models.py +++ b/pepdbagent/models.py @@ -2,7 +2,7 @@ import datetime from typing import Dict, List, Optional, Union -from peppy.const import CONFIG_KEY, SAMPLE_RAW_DICT_KEY, SUBSAMPLE_RAW_LIST_KEY +from peprs.const import CONFIG_KEY, SAMPLE_RAW_DICT_KEY, SUBSAMPLE_RAW_DICT_KEY from pydantic import BaseModel, ConfigDict, Field, field_validator from pepdbagent.const import DEFAULT_TAG @@ -14,7 +14,7 @@ class ProjectDict(BaseModel): """ config: dict = Field(alias=CONFIG_KEY) - subsample_list: Optional[Union[list, None]] = Field(alias=SUBSAMPLE_RAW_LIST_KEY) + subsample_list: Optional[Union[list, None]] = Field(alias=SUBSAMPLE_RAW_DICT_KEY) sample_dict: list = Field(alias=SAMPLE_RAW_DICT_KEY) model_config = ConfigDict(populate_by_name=True, extra="forbid") diff --git a/pepdbagent/modules/project.py b/pepdbagent/modules/project.py index bf87e78..6259041 100644 --- a/pepdbagent/modules/project.py +++ b/pepdbagent/modules/project.py @@ -4,14 +4,15 @@ from typing import Dict, List, NoReturn, Union import numpy as np -import peppy -from peppy.const import ( +import peprs +from peprs.const import ( CONFIG_KEY, - SAMPLE_NAME_ATTR, SAMPLE_RAW_DICT_KEY, - SAMPLE_TABLE_INDEX_KEY, - SUBSAMPLE_RAW_LIST_KEY, + SUBSAMPLE_RAW_DICT_KEY, ) + +SAMPLE_NAME_ATTR = "sample_name" +SAMPLE_TABLE_INDEX_KEY = "sample_table_index" from sqlalchemy import Select, and_, delete, select from sqlalchemy.exc import IntegrityError, NoResultFound from sqlalchemy.orm import Session @@ -86,7 +87,7 @@ def get( tag: str = DEFAULT_TAG, raw: bool = True, with_id: bool = False, - ) -> Union[peppy.Project, dict, None]: + ) -> Union[peprs.Project, dict, None]: """ Retrieve project from database by specifying namespace, name and tag @@ -95,7 +96,7 @@ def get( :param tag: tag (or version) of the project. :param raw: retrieve unprocessed (raw) PEP dict. :param with_id: retrieve project with id [default: False] - :return: peppy.Project object with found project or dict with unprocessed + :return: peprs.Project object with found project or dict with unprocessed PEP elements: { name: str description: str @@ -133,13 +134,13 @@ def get( project_value = { CONFIG_KEY: found_prj.config, SAMPLE_RAW_DICT_KEY: sample_list, - SUBSAMPLE_RAW_LIST_KEY: subsample_list, + SUBSAMPLE_RAW_DICT_KEY: subsample_list, } if raw: return project_value else: - project_obj = peppy.Project().from_dict(project_value) + project_obj = peprs.Project.from_dict(project_value) return project_obj else: @@ -224,13 +225,13 @@ def get_by_rp( self, registry_path: str, raw: bool = False, - ) -> Union[peppy.Project, dict, None]: + ) -> Union[peprs.Project, dict, None]: """ Retrieve project from database by specifying project registry_path :param registry_path: project registry_path [e.g. namespace/name:tag] :param raw: retrieve unprocessed (raw) PEP dict. - :return: peppy.Project object with found project or dict with unprocessed + :return: peprs.Project object with found project or dict with unprocessed PEP elements: { name: str description: str @@ -296,7 +297,7 @@ def delete_by_rp( def create( self, - project: Union[peppy.Project, dict], + project: Union[peprs.Project, dict], namespace: str, name: str = None, tag: str = DEFAULT_TAG, @@ -312,14 +313,8 @@ def create( Project with the key, that already exists won't be uploaded(but case, when argument update is set True) - :param peppy.Project project: Project object that has to be uploaded to the DB - danger zone: - optionally, project can be a dictionary with PEP elements - ({ - _config: dict, - _sample_dict: Union[list, dict], - _subsample_list: list - }) + :param project: peprs.Project object or dict with PEP elements + ({config: dict, samples: list, subsamples: list}) :param namespace: namespace of the project (Default: 'other') :param name: name of the project (Default: name is taken from the project object) :param tag: tag (or version) of the project. @@ -332,8 +327,8 @@ def create( :param description: description of the project :return: None """ - if isinstance(project, peppy.Project): - proj_dict = project.to_dict(extended=True, orient="records") + if isinstance(project, peprs.Project): + proj_dict = project.to_dict(raw=True, by_sample=True) elif isinstance(project, dict): # verify if the dictionary has all necessary elements. # samples should be always presented as list of dicts (orient="records")) @@ -343,11 +338,14 @@ def create( proj_dict = ProjectDict(**project).model_dump(by_alias=True) else: raise PEPDatabaseAgentError( - "Project has to be peppy.Project object or dictionary with PEP elements" + "Project has to be peprs.Project object or dictionary with PEP elements" ) if not description: - description = project.get(description, "") + if isinstance(project, peprs.Project): + description = project.description or "" + else: + description = proj_dict.get(CONFIG_KEY, {}).get(DESCRIPTION_KEY, "") proj_dict[CONFIG_KEY][DESCRIPTION_KEY] = description namespace = namespace.lower() @@ -442,8 +440,8 @@ def create( ), ) - if proj_dict[SUBSAMPLE_RAW_LIST_KEY]: - subsamples = proj_dict[SUBSAMPLE_RAW_LIST_KEY] + if proj_dict.get(SUBSAMPLE_RAW_DICT_KEY): + subsamples = proj_dict[SUBSAMPLE_RAW_DICT_KEY] self._add_subsamples_to_project(new_prj, subsamples) with Session(self._sa_engine) as session: @@ -553,9 +551,9 @@ def _overwrite( sample_table_index=project_dict[CONFIG_KEY].get(SAMPLE_TABLE_INDEX_KEY), ) - if project_dict[SUBSAMPLE_RAW_LIST_KEY]: + if project_dict.get(SUBSAMPLE_RAW_DICT_KEY): self._add_subsamples_to_project( - found_prj, project_dict[SUBSAMPLE_RAW_LIST_KEY] + found_prj, project_dict[SUBSAMPLE_RAW_DICT_KEY] ) session.commit() @@ -579,7 +577,7 @@ def update( :param update_dict: dict with update key->values. Dict structure: { - project: Optional[peppy.Project] + project: Optional[peprs.Project] is_private: Optional[bool] tag: Optional[str] name: Optional[str] @@ -603,11 +601,11 @@ def update( else: if "project" in update_dict: project_dict = update_dict.pop("project").to_dict( - extended=True, orient="records" + raw=True, by_sample=True ) update_dict["config"] = project_dict[CONFIG_KEY] update_dict["samples"] = project_dict[SAMPLE_RAW_DICT_KEY] - update_dict["subsamples"] = project_dict[SUBSAMPLE_RAW_LIST_KEY] + update_dict["subsamples"] = project_dict.get(SUBSAMPLE_RAW_DICT_KEY, []) update_values = UpdateItems(**update_dict) @@ -1174,7 +1172,8 @@ def get_samples( ).get(SAMPLE_RAW_DICT_KEY) return ( self.get(namespace=namespace, name=name, tag=tag, raw=False, with_id=with_ids) - .sample_table.replace({np.nan: None}) + .to_pandas() + .replace({np.nan: None}) .to_dict(orient="records") ) @@ -1233,7 +1232,7 @@ def get_project_from_history( history_id: int, raw: bool = True, with_id: bool = False, - ) -> Union[dict, peppy.Project]: + ) -> Union[dict, peprs.Project]: """ Get project sample history annotation by providing namespace, name, and tag @@ -1319,13 +1318,13 @@ def get_project_from_history( return { CONFIG_KEY: project_config or project_mapping.config, SAMPLE_RAW_DICT_KEY: ordered_samples_list, - SUBSAMPLE_RAW_LIST_KEY: self.get_subsamples(namespace, name, tag), + SUBSAMPLE_RAW_DICT_KEY: self.get_subsamples(namespace, name, tag), } - return peppy.Project.from_dict( + return peprs.Project.from_dict( pep_dictionary={ CONFIG_KEY: project_config or project_mapping.config, SAMPLE_RAW_DICT_KEY: ordered_samples_list, - SUBSAMPLE_RAW_LIST_KEY: self.get_subsamples(namespace, name, tag), + SUBSAMPLE_RAW_DICT_KEY: self.get_subsamples(namespace, name, tag), } ) @@ -1440,7 +1439,7 @@ def restore( with_id=True, ) self.update( - update_dict={"project": peppy.Project.from_dict(restore_project)}, + update_dict={"project": peprs.Project.from_dict(restore_project)}, namespace=namespace, name=name, tag=tag, diff --git a/pepdbagent/modules/sample.py b/pepdbagent/modules/sample.py index 90e216b..c9d12bf 100644 --- a/pepdbagent/modules/sample.py +++ b/pepdbagent/modules/sample.py @@ -2,8 +2,7 @@ import logging from typing import Union -import peppy -from peppy.const import SAMPLE_TABLE_INDEX_KEY +import peprs from sqlalchemy import and_, select from sqlalchemy.orm import Session from sqlalchemy.orm.attributes import flag_modified @@ -37,7 +36,7 @@ def get( sample_name: str, tag: str = DEFAULT_TAG, raw: bool = True, - ) -> Union[peppy.Sample, dict, None]: + ) -> Union[peprs.Sample, dict, None]: """ Retrieve sample from the database using namespace, name, tag, and sample_name @@ -45,15 +44,8 @@ def get( :param name: name of the project (Default: name is taken from the project object) :param tag: tag (or version) of the project. :param sample_name: sample_name of the sample - :param raw: return raw dict or peppy.Sample object [Default: True] - :return: peppy.Project object with found project or dict with unprocessed - PEP elements: { - name: str - description: str - _config: dict - _sample_dict: dict - _subsample_dict: dict - } + :param raw: return raw dict or peprs.Sample object [Default: True] + :return: peprs.Sample object or raw dict """ statement_sample = select(Samples).where( and_( @@ -83,13 +75,10 @@ def get( if result: if not raw: config = session.execute(project_config_statement).one_or_none()[0] - project = peppy.Project().from_dict( + project = peprs.Project.from_dict( pep_dictionary={ - "name": name, - "description": config.get("description"), - "_config": config, - "_sample_dict": [result.sample], - "_subsample_dict": None, + "config": config, + "samples": [result.sample], } ) return project.samples[0] @@ -155,11 +144,11 @@ def update( sample_mapping.sample.update(update_dict) try: sample_mapping.sample_name = sample_mapping.sample[ - project_mapping.config.get(SAMPLE_TABLE_INDEX_KEY, "sample_name") + project_mapping.config.get("sample_table_index", "sample_name") ] except KeyError: raise KeyError( - f"Sample index key {project_mapping.config.get(SAMPLE_TABLE_INDEX_KEY, 'sample_name')} not found in sample dict" + f"Sample index key {project_mapping.config.get('sample_table_index', 'sample_name')} not found in sample dict" ) # This line needed due to: https://github.com/sqlalchemy/sqlalchemy/issues/5218 @@ -206,11 +195,11 @@ def add( project_mapping = session.scalar(project_statement) try: sample_name = sample_dict[ - project_mapping.config.get(SAMPLE_TABLE_INDEX_KEY, "sample_name") + project_mapping.config.get("sample_table_index", "sample_name") ] except KeyError: raise KeyError( - f"Sample index key {project_mapping.config.get(SAMPLE_TABLE_INDEX_KEY, 'sample_name')} not found in sample dict" + f"Sample index key {project_mapping.config.get('sample_table_index', 'sample_name')} not found in sample dict" ) statement = select(Samples).where( and_(Samples.project_id == project_mapping.id, Samples.sample_name == sample_name) diff --git a/pepdbagent/modules/view.py b/pepdbagent/modules/view.py index 8704c97..5dd79ea 100644 --- a/pepdbagent/modules/view.py +++ b/pepdbagent/modules/view.py @@ -3,7 +3,7 @@ import logging from typing import List, Union -import peppy +import peprs from sqlalchemy import and_, delete, select from sqlalchemy.exc import IntegrityError from sqlalchemy.orm import Session @@ -44,7 +44,7 @@ def get( tag: str = DEFAULT_TAG, view_name: str = None, raw: bool = True, - ) -> Union[peppy.Project, dict, None]: + ) -> Union[peprs.Project, dict, None]: """ Retrieve view of the project from the database. View is a subset of the samples in the project. e.g. bed-db project has all the samples in bedbase, @@ -55,14 +55,7 @@ def get( :param tag: tag of the project (Default: tag is taken from the project object) :param view_name: name of the view :param raw: retrieve unprocessed (raw) PEP dict. [Default: True] - :return: peppy.Project object with found project or dict with unprocessed - PEP elements: { - name: str - description: str - _config: dict - _sample_dict: dict - _subsample_dict: dict - } + :return: peprs.Project object or raw dict """ _LOGGER.debug(f"Get view {view_name} from {namespace}/{name}:{tag}") view_statement = select(Views).where( @@ -80,11 +73,11 @@ def get( ) samples = [sample.sample.sample for sample in view.samples] config = view.project_mapping.config - sub_project_dict = {"_config": config, "_sample_dict": samples, "_subsample_dict": None} + sub_project_dict = {"config": config, "samples": samples} if raw: return sub_project_dict else: - return peppy.Project.from_dict(sub_project_dict) + return peprs.Project.from_dict(sub_project_dict) def get_annotation( self, namespace: str, name: str, tag: str = DEFAULT_TAG, view_name: str = None @@ -349,7 +342,7 @@ def remove_sample( def get_snap_view( self, namespace: str, name: str, tag: str, sample_name_list: List[str], raw: bool = False - ) -> Union[peppy.Project, dict]: + ) -> Union[peprs.Project, dict]: """ Get a snap view of the project. Snap view is a view of the project with only the samples in the list. This view won't be saved in the database. @@ -359,7 +352,7 @@ def get_snap_view( :param tag: tag of the project :param sample_name_list: list of sample names e.g. ["sample1", "sample2"] :param raw: retrieve unprocessed (raw) PEP dict. - :return: peppy.Project object + :return: peprs.Project object """ _LOGGER.debug(f"Creating snap view for {namespace}/{name}:{tag}") project_statement = select(Projects).where( @@ -390,10 +383,10 @@ def get_snap_view( config = project.config if raw: - return {"_config": config, "_sample_dict": samples, "_subsample_dict": None} + return {"config": config, "samples": samples} else: - return peppy.Project.from_dict( - {"_config": config, "_sample_dict": samples, "_subsample_dict": None} + return peprs.Project.from_dict( + {"config": config, "samples": samples} ) def get_views_annotation( diff --git a/pepdbagent/utils.py b/pepdbagent/utils.py index fc96684..a15809a 100644 --- a/pepdbagent/utils.py +++ b/pepdbagent/utils.py @@ -6,7 +6,7 @@ from typing import List, Tuple, Union import ubiquerg -from peppy.const import SAMPLE_RAW_DICT_KEY +from peprs.const import SAMPLE_RAW_DICT_KEY from pepdbagent.exceptions import RegistryPathError diff --git a/requirements/requirements-all.txt b/requirements/requirements-all.txt index 786a29d..e6a85bc 100644 --- a/requirements/requirements-all.txt +++ b/requirements/requirements-all.txt @@ -1,6 +1,6 @@ sqlalchemy>=2.0.0 logmuse>=0.2.7 -peppy>=0.40.6 +peprs>=0.1.2 ubiquerg>=0.6.2 coloredlogs>=15.0.1 pytest-mock diff --git a/tests/data/namespace1/amendments1/project_config.yaml b/tests/data/namespace1/amendments1/project_config.yaml index 79da9ab..86c8fd8 100644 --- a/tests/data/namespace1/amendments1/project_config.yaml +++ b/tests/data/namespace1/amendments1/project_config.yaml @@ -7,7 +7,6 @@ sample_modifiers: attributes: [file_path] sources: source1: /data/lab/project/{organism}_{time}h.fastq - source2: /path/from/collaborator/weirdNamingScheme_{external_id}.fastq project_modifiers: amend: newLib: diff --git a/tests/data/namespace2/derive/project_config.yaml b/tests/data/namespace2/derive/project_config.yaml index 445929d..2e23feb 100644 --- a/tests/data/namespace2/derive/project_config.yaml +++ b/tests/data/namespace2/derive/project_config.yaml @@ -7,4 +7,4 @@ sample_modifiers: attributes: [file_path] sources: source1: $HOME/data/lab/project/{organism}_{time}h.fastq - source2: /path/from/collaborator/weirdNamingScheme_{external_id}.fastq + diff --git a/tests/data/namespace3/piface/project_config.yaml b/tests/data/namespace3/piface/project_config.yaml index f808189..7326eaa 100644 --- a/tests/data/namespace3/piface/project_config.yaml +++ b/tests/data/namespace3/piface/project_config.yaml @@ -11,7 +11,7 @@ looper: sample_modifiers: append: attr: "val" - pipeline_interfaces: ["pipeline_interface1_sample.yaml", "pipeline_interface2_sample.yaml"] + pipeline_interfaces: "pipeline_interface1_sample.yaml" derive: attributes: [read1, read2] sources: diff --git a/tests/data/namespace3/remove/project_config.yaml b/tests/data/namespace3/remove/project_config.yaml index 7821eba..8a789eb 100644 --- a/tests/data/namespace3/remove/project_config.yaml +++ b/tests/data/namespace3/remove/project_config.yaml @@ -7,6 +7,5 @@ sample_modifiers: attributes: [file_path] sources: source1: /data/lab/project/{organism}_{time}h.fastq - source2: /path/from/collaborator/weirdNamingScheme_{external_id}.fastq remove: - protocol diff --git a/tests/data/private_test/amendments1/project_config.yaml b/tests/data/private_test/amendments1/project_config.yaml index 79da9ab..86c8fd8 100644 --- a/tests/data/private_test/amendments1/project_config.yaml +++ b/tests/data/private_test/amendments1/project_config.yaml @@ -7,7 +7,6 @@ sample_modifiers: attributes: [file_path] sources: source1: /data/lab/project/{organism}_{time}h.fastq - source2: /path/from/collaborator/weirdNamingScheme_{external_id}.fastq project_modifiers: amend: newLib: diff --git a/tests/data/private_test/derive/project_config.yaml b/tests/data/private_test/derive/project_config.yaml index 445929d..6f24c70 100644 --- a/tests/data/private_test/derive/project_config.yaml +++ b/tests/data/private_test/derive/project_config.yaml @@ -7,4 +7,3 @@ sample_modifiers: attributes: [file_path] sources: source1: $HOME/data/lab/project/{organism}_{time}h.fastq - source2: /path/from/collaborator/weirdNamingScheme_{external_id}.fastq diff --git a/tests/data/private_test/remove/project_config.yaml b/tests/data/private_test/remove/project_config.yaml index 7821eba..8a789eb 100644 --- a/tests/data/private_test/remove/project_config.yaml +++ b/tests/data/private_test/remove/project_config.yaml @@ -7,6 +7,5 @@ sample_modifiers: attributes: [file_path] sources: source1: /data/lab/project/{organism}_{time}h.fastq - source2: /path/from/collaborator/weirdNamingScheme_{external_id}.fastq remove: - protocol diff --git a/tests/test_project.py b/tests/test_project.py index 426f026..c79198a 100644 --- a/tests/test_project.py +++ b/tests/test_project.py @@ -1,5 +1,5 @@ import numpy as np -import peppy +import peprs import pytest from pepdbagent.exceptions import ProjectNotFoundError @@ -18,15 +18,15 @@ class TestProject: def test_create_project(self): with PEPDBAgentContextManager(add_data=False) as agent: - prj = peppy.Project(list_of_available_peps()["namespace3"]["subtables"]) + prj = peprs.Project(list_of_available_peps()["namespace3"]["subtables"]) agent.project.create(prj, namespace="test", name="imply", overwrite=False) assert True def test_create_project_from_dict(self): with PEPDBAgentContextManager(add_data=False) as agent: - prj = peppy.Project(list_of_available_peps()["namespace3"]["subtables"]) + prj = peprs.Project(list_of_available_peps()["namespace3"]["subtables"]) agent.project.create( - prj.to_dict(extended=True, orient="records"), + prj.to_dict(raw=True, by_sample=True), namespace="test", name="imply", overwrite=True, @@ -47,9 +47,16 @@ def test_create_project_from_dict(self): ) def test_get_project(self, namespace, name): with PEPDBAgentContextManager(add_data=True) as agent: - kk = agent.project.get(namespace=namespace, name=name, tag="default", raw=False) - ff = peppy.Project(get_path_to_example_file(namespace, name)) - assert kk == ff + kk = agent.project.get(namespace=namespace, name=name, tag="default", raw=True) + ff = peprs.Project(get_path_to_example_file(namespace, name)).to_dict( + raw=True, by_sample=True + ) + # pepdbagent always sets the registry name on the stored config, + # overriding any name in the file (which may be empty or different). + ff["config"]["name"] = name + assert kk["config"] == ff["config"] + assert kk["samples"] == ff["samples"] + assert kk.get("subsamples", []) == ff.get("subsamples", []) @pytest.mark.parametrize( "namespace, name", @@ -65,10 +72,11 @@ def test_get_config(self, namespace, name): name=name, tag="default", ) - ff = peppy.Project(get_path_to_example_file(namespace, name)) - ff["_original_config"]["description"] = description - ff["_original_config"]["name"] = name - assert kk == ff["_original_config"] + ff = peprs.Project(get_path_to_example_file(namespace, name)) + expected_config = ff.config.copy() + expected_config["description"] = description + expected_config["name"] = name + assert kk == expected_config @pytest.mark.parametrize( "namespace, name", @@ -83,11 +91,11 @@ def test_get_subsamples(self, namespace, name): name=name, tag="default", ) - orgiginal_prj = peppy.Project(get_path_to_example_file(namespace, name)) + orgiginal_prj = peprs.Project(get_path_to_example_file(namespace, name)) assert ( prj_subtables - == orgiginal_prj.to_dict(extended=True, orient="records")["_subsample_list"] + == orgiginal_prj.to_dict(raw=True, by_sample=True)["subsamples"] ) @pytest.mark.parametrize( @@ -101,11 +109,11 @@ def test_get_samples_raw(self, namespace, name): prj_samples = agent.project.get_samples( namespace=namespace, name=name, tag="default", raw=True ) - orgiginal_prj = peppy.Project(get_path_to_example_file(namespace, name)) + orgiginal_prj = peprs.Project(get_path_to_example_file(namespace, name)) assert ( prj_samples - == orgiginal_prj.to_dict(extended=True, orient="records")["_sample_dict"] + == orgiginal_prj.to_dict(raw=True, by_sample=True)["samples"] ) @pytest.mark.parametrize( @@ -122,12 +130,24 @@ def test_get_samples_processed(self, namespace, name): tag="default", raw=False, ) - orgiginal_prj = peppy.Project(get_path_to_example_file(namespace, name)) - - assert prj_samples == orgiginal_prj.sample_table.replace({np.nan: None}).to_dict( - orient="records" + orgiginal_prj = peprs.Project(get_path_to_example_file(namespace, name)) + expected = ( + orgiginal_prj.to_pandas().replace({np.nan: None}).to_dict(orient="records") ) + # Normalize numpy arrays (used for subsample list columns) to plain lists + # so dict equality works without raising "truth value is ambiguous". + def _normalize(samples): + return [ + { + k: (v.tolist() if isinstance(v, np.ndarray) else v) + for k, v in s.items() + } + for s in samples + ] + + assert _normalize(prj_samples) == _normalize(expected) + @pytest.mark.parametrize( "namespace, name,tag", [ diff --git a/tests/test_project_history.py b/tests/test_project_history.py index 332e1ca..20d00ae 100644 --- a/tests/test_project_history.py +++ b/tests/test_project_history.py @@ -1,4 +1,4 @@ -import peppy +import peprs import pytest from pepdbagent.const import PEPHUB_SAMPLE_ID_KEY @@ -26,10 +26,10 @@ def test_get_add_history_all_annotation(self, namespace, name, sample_name): with PEPDBAgentContextManager(add_data=True) as agent: prj = agent.project.get(namespace, name, tag="default", with_id=True) - prj["_sample_dict"][0]["sample_name"] = "new_sample_name" + prj["samples"][0]["sample_name"] = "new_sample_name" - del prj["_sample_dict"][1] - del prj["_sample_dict"][2] + del prj["samples"][1] + del prj["samples"][2] new_sample1 = { "sample_name": "new_sample", "protocol": "new_protocol", @@ -41,14 +41,14 @@ def test_get_add_history_all_annotation(self, namespace, name, sample_name): PEPHUB_SAMPLE_ID_KEY: None, } - prj["_sample_dict"].append(new_sample1.copy()) - prj["_sample_dict"].append(new_sample2.copy()) + prj["samples"].append(new_sample1.copy()) + prj["samples"].append(new_sample2.copy()) agent.project.update( namespace=namespace, name=name, tag="default", - update_dict={"project": peppy.Project.from_dict(prj)}, + update_dict={"project": peprs.Project.from_dict(prj)}, ) project_history = agent.project.get_history(namespace, name, tag="default") @@ -66,10 +66,10 @@ def test_get_add_history_all_project(self, namespace, name, sample_name): prj_init = agent.project.get(namespace, name, tag="default", raw=False) prj = agent.project.get(namespace, name, tag="default", with_id=True) - # prj["_sample_dict"][0]["sample_name"] = "new_sample_name" + # prj["samples"][0]["sample_name"] = "new_sample_name" - del prj["_sample_dict"][1] - del prj["_sample_dict"][2] + del prj["samples"][1] + del prj["samples"][2] new_sample1 = { "sample_name": "new_sample", "protocol": "new_protocol", @@ -81,14 +81,14 @@ def test_get_add_history_all_project(self, namespace, name, sample_name): PEPHUB_SAMPLE_ID_KEY: None, } - prj["_sample_dict"].append(new_sample1.copy()) - prj["_sample_dict"].append(new_sample2.copy()) + prj["samples"].append(new_sample1.copy()) + prj["samples"].append(new_sample2.copy()) agent.project.update( namespace=namespace, name=name, tag="default", - update_dict={"project": peppy.Project.from_dict(prj)}, + update_dict={"project": peprs.Project.from_dict(prj)}, ) history_prj = agent.project.get_project_from_history( @@ -106,13 +106,13 @@ def test_get_history_multiple_changes(self, namespace, name, sample_name): with PEPDBAgentContextManager(add_data=True) as agent: prj = agent.project.get(namespace, name, tag="default", with_id=True) - del prj["_sample_dict"][1] + del prj["samples"][1] agent.project.update( namespace=namespace, name=name, tag="default", - update_dict={"project": peppy.Project.from_dict(prj)}, + update_dict={"project": peprs.Project.from_dict(prj)}, ) prj = agent.project.get(namespace, name, tag="default", with_id=True) @@ -122,13 +122,13 @@ def test_get_history_multiple_changes(self, namespace, name, sample_name): "protocol": "new_protocol", PEPHUB_SAMPLE_ID_KEY: None, } - prj["_sample_dict"].append(new_sample1.copy()) + prj["samples"].append(new_sample1.copy()) agent.project.update( namespace=namespace, name=name, tag="default", - update_dict={"project": peppy.Project.from_dict(prj)}, + update_dict={"project": peprs.Project.from_dict(prj)}, ) history = agent.project.get_history(namespace, name, tag="default") @@ -144,13 +144,13 @@ def test_get_history_multiple_changes(self, namespace, name, sample_name): def test_get_project_incorrect_history_id(self, namespace, name, sample_name): with PEPDBAgentContextManager(add_data=True) as agent: prj = agent.project.get(namespace, name, tag="default", with_id=True) - del prj["_sample_dict"][1] + del prj["samples"][1] agent.project.update( namespace=namespace, name=name, tag="default", - update_dict={"project": peppy.Project.from_dict(prj)}, + update_dict={"project": peprs.Project.from_dict(prj)}, ) with pytest.raises(HistoryNotFoundError): @@ -179,13 +179,13 @@ def test_delete_all_history(self, namespace, name, sample_name): with PEPDBAgentContextManager(add_data=True) as agent: prj = agent.project.get(namespace, name, tag="default", with_id=True) - del prj["_sample_dict"][1] + del prj["samples"][1] agent.project.update( namespace=namespace, name=name, tag="default", - update_dict={"project": peppy.Project.from_dict(prj)}, + update_dict={"project": peprs.Project.from_dict(prj)}, ) prj = agent.project.get(namespace, name, tag="default", with_id=True) @@ -195,13 +195,13 @@ def test_delete_all_history(self, namespace, name, sample_name): "protocol": "new_protocol", PEPHUB_SAMPLE_ID_KEY: None, } - prj["_sample_dict"].append(new_sample1.copy()) + prj["samples"].append(new_sample1.copy()) agent.project.update( namespace=namespace, name=name, tag="default", - update_dict={"project": peppy.Project.from_dict(prj)}, + update_dict={"project": peprs.Project.from_dict(prj)}, ) history = agent.project.get_history(namespace, name, tag="default") @@ -226,13 +226,13 @@ def test_delete_one_history(self, namespace, name, sample_name): with PEPDBAgentContextManager(add_data=True) as agent: prj = agent.project.get(namespace, name, tag="default", with_id=True) - del prj["_sample_dict"][1] + del prj["samples"][1] agent.project.update( namespace=namespace, name=name, tag="default", - update_dict={"project": peppy.Project.from_dict(prj)}, + update_dict={"project": peprs.Project.from_dict(prj)}, ) prj = agent.project.get(namespace, name, tag="default", with_id=True) @@ -242,13 +242,13 @@ def test_delete_one_history(self, namespace, name, sample_name): "protocol": "new_protocol", PEPHUB_SAMPLE_ID_KEY: None, } - prj["_sample_dict"].append(new_sample1.copy()) + prj["samples"].append(new_sample1.copy()) agent.project.update( namespace=namespace, name=name, tag="default", - update_dict={"project": peppy.Project.from_dict(prj)}, + update_dict={"project": peprs.Project.from_dict(prj)}, ) history = agent.project.get_history(namespace, name, tag="default") @@ -273,13 +273,13 @@ def test_restore_project(self, namespace, name, sample_name): prj_org = agent.project.get(namespace, name, tag="default", with_id=False) prj = agent.project.get(namespace, name, tag="default", with_id=True) - del prj["_sample_dict"][1] + del prj["samples"][1] agent.project.update( namespace=namespace, name=name, tag="default", - update_dict={"project": peppy.Project.from_dict(prj)}, + update_dict={"project": peprs.Project.from_dict(prj)}, ) prj = agent.project.get(namespace, name, tag="default", with_id=True) @@ -289,13 +289,13 @@ def test_restore_project(self, namespace, name, sample_name): "protocol": "new_protocol", PEPHUB_SAMPLE_ID_KEY: None, } - prj["_sample_dict"].append(new_sample1.copy()) + prj["samples"].append(new_sample1.copy()) agent.project.update( namespace=namespace, name=name, tag="default", - update_dict={"project": peppy.Project.from_dict(prj)}, + update_dict={"project": peprs.Project.from_dict(prj)}, ) agent.project.restore(namespace, name, tag="default", history_id=1) diff --git a/tests/test_samples.py b/tests/test_samples.py index e8a6862..15acc73 100644 --- a/tests/test_samples.py +++ b/tests/test_samples.py @@ -1,4 +1,4 @@ -import peppy +import peprs import pytest from pepdbagent.exceptions import SampleNotFoundError @@ -20,7 +20,7 @@ class TestSamples: def test_retrieve_one_sample(self, namespace, name, sample_name): with PEPDBAgentContextManager(add_data=True) as agent: one_sample = agent.sample.get(namespace, name, sample_name, raw=False) - assert isinstance(one_sample, peppy.Sample) + assert isinstance(one_sample, peprs.Sample) assert one_sample.sample_name == sample_name @pytest.mark.parametrize( @@ -44,7 +44,7 @@ def test_retrieve_raw_sample(self, namespace, name, sample_name): def test_retrieve_sample_with_modified_sample_id(self, namespace, name, sample_name): with PEPDBAgentContextManager(add_data=True) as agent: one_sample = agent.sample.get(namespace, name, sample_name, raw=False) - assert isinstance(one_sample, peppy.Sample) + assert isinstance(one_sample, peprs.Sample) assert one_sample.sample_id == "frog_1" @pytest.mark.parametrize( @@ -154,7 +154,7 @@ def test_project_timestamp_was_changed(self, namespace, name, sample_name): def test_delete_sample(self, namespace, name, sample_name): with PEPDBAgentContextManager(add_data=True) as agent: one_sample = agent.sample.get(namespace, name, sample_name, raw=False) - assert isinstance(one_sample, peppy.Sample) + assert isinstance(one_sample, peprs.Sample) agent.sample.delete(namespace, name, tag="default", sample_name=sample_name) @@ -203,7 +203,9 @@ def test_add_sample(self, namespace, name, tag, sample_dict): ) def test_overwrite_sample(self, namespace, name, tag, sample_dict): with PEPDBAgentContextManager(add_data=True) as agent: - assert agent.project.get(namespace, name, raw=False).get_sample("pig_0h").time == "0" + # peprs/polars infers numeric values from the sample table, so the + # original time is loaded as int (not string). + assert agent.project.get(namespace, name, raw=False).get_sample("pig_0h").time == 0 agent.sample.add(namespace, name, tag, sample_dict, overwrite=True) assert ( diff --git a/tests/test_updates.py b/tests/test_updates.py index 628772e..3c2a15f 100644 --- a/tests/test_updates.py +++ b/tests/test_updates.py @@ -1,6 +1,5 @@ -import peppy +import peprs import pytest -from peppy.exceptions import IllegalStateException from pepdbagent.const import PEPHUB_SAMPLE_ID_KEY from pepdbagent.exceptions import ProjectDuplicatedSampleGUIDsError, SampleTableUpdateError @@ -248,16 +247,16 @@ def test_project_can_have_2_sample_names(self, namespace, name): """ with PEPDBAgentContextManager(add_data=True) as agent: new_prj = agent.project.get(namespace=namespace, name=name, raw=False, with_id=True) - prj_dict = new_prj.to_dict(extended=True, orient="records") + prj_dict = new_prj.to_dict(raw=True, by_sample=True) - prj_dict["_sample_dict"].append( + prj_dict["samples"].append( { "file": "data/frog23_data.txt", "protocol": "anySample3Type", "sample_name": "frog_2", } ) - prj_dict["_sample_dict"].append( + prj_dict["samples"].append( { "file": "data/frog23_data.txt4", "protocol": "anySample3Type4", @@ -270,12 +269,12 @@ def test_project_can_have_2_sample_names(self, namespace, name): namespace=namespace, name=name, tag="default", - update_dict={"project": peppy.Project.from_dict(prj_dict)}, + update_dict={"project": peprs.Project.from_dict(prj_dict)}, ) prj = agent.project.get(namespace=namespace, name=name, raw=True) - assert len(prj["_sample_dict"]) == 4 + assert len(prj["samples"]) == 4 @pytest.mark.skipif( @@ -304,23 +303,23 @@ def test_update_whole_project_with_id(self, namespace, name): PEPHUB_SAMPLE_ID_KEY: None, } - prj["_sample_dict"].append(new_sample.copy()) - prj["_sample_dict"][0]["sample_name"] = "new_sample_name2" - del prj["_sample_dict"][1] + prj["samples"].append(new_sample.copy()) + prj["samples"][0]["sample_name"] = "new_sample_name2" + del prj["samples"][1] agent.project.update( namespace=namespace, name=name, tag="default", - update_dict={"project": peppy.Project.from_dict(prj)}, + update_dict={"project": peprs.Project.from_dict(prj)}, ) del new_sample[PEPHUB_SAMPLE_ID_KEY] - peppy_prj["_sample_dict"].append(new_sample.copy()) # add sample without id - peppy_prj["_sample_dict"][0]["sample_name"] = "new_sample_name2" # modify sample - del peppy_prj["_sample_dict"][1] # delete sample + peppy_prj["samples"].append(new_sample.copy()) # add sample without id + peppy_prj["samples"][0]["sample_name"] = "new_sample_name2" # modify sample + del peppy_prj["samples"][1] # delete sample - assert peppy.Project.from_dict(peppy_prj) == agent.project.get( + assert peprs.Project.from_dict(peppy_prj) == agent.project.get( namespace=namespace, name=name, raw=False ) @@ -342,19 +341,19 @@ def test_insert_new_row(self, namespace, name): PEPHUB_SAMPLE_ID_KEY: None, } - prj["_sample_dict"].append(new_sample.copy()) + prj["samples"].append(new_sample.copy()) agent.project.update( namespace=namespace, name=name, tag="default", - update_dict={"project": peppy.Project.from_dict(prj)}, + update_dict={"project": peprs.Project.from_dict(prj)}, ) del new_sample[PEPHUB_SAMPLE_ID_KEY] - peppy_prj["_sample_dict"].append(new_sample.copy()) # add sample without id + peppy_prj["samples"].append(new_sample.copy()) # add sample without id - assert peppy.Project.from_dict(peppy_prj) == agent.project.get( + assert peprs.Project.from_dict(peppy_prj) == agent.project.get( namespace=namespace, name=name, raw=False ) @@ -381,22 +380,22 @@ def test_insert_new_multiple_rows(self, namespace, name): PEPHUB_SAMPLE_ID_KEY: None, } - prj["_sample_dict"].append(new_sample1.copy()) - prj["_sample_dict"].append(new_sample2.copy()) + prj["samples"].append(new_sample1.copy()) + prj["samples"].append(new_sample2.copy()) agent.project.update( namespace=namespace, name=name, tag="default", - update_dict={"project": peppy.Project.from_dict(prj)}, + update_dict={"project": peprs.Project.from_dict(prj)}, ) del new_sample1[PEPHUB_SAMPLE_ID_KEY] del new_sample2[PEPHUB_SAMPLE_ID_KEY] - peppy_prj["_sample_dict"].append(new_sample1.copy()) # add sample without id - peppy_prj["_sample_dict"].append(new_sample2.copy()) # add sample without id + peppy_prj["samples"].append(new_sample1.copy()) # add sample without id + peppy_prj["samples"].append(new_sample2.copy()) # add sample without id - assert peppy.Project.from_dict(peppy_prj) == agent.project.get( + assert peprs.Project.from_dict(peppy_prj) == agent.project.get( namespace=namespace, name=name, raw=False ) @@ -407,7 +406,11 @@ def test_insert_new_multiple_rows(self, namespace, name): ], ) def test_insert_new_multiple_rows_duplicated_samples(self, namespace, name): + """PEP 2.1.0 allows duplicate sample names, so this should succeed.""" with PEPDBAgentContextManager(add_data=True) as agent: + original_count = len( + agent.project.get(namespace=namespace, name=name, raw=True)["samples"] + ) prj = agent.project.get(namespace=namespace, name=name, raw=True, with_id=True) new_sample1 = { @@ -421,16 +424,17 @@ def test_insert_new_multiple_rows_duplicated_samples(self, namespace, name): PEPHUB_SAMPLE_ID_KEY: None, } - prj["_sample_dict"].append(new_sample1.copy()) - prj["_sample_dict"].append(new_sample2.copy()) + prj["samples"].append(new_sample1.copy()) + prj["samples"].append(new_sample2.copy()) - with pytest.raises(IllegalStateException): - agent.project.update( - namespace=namespace, - name=name, - tag="default", - update_dict={"project": peppy.Project.from_dict(prj)}, - ) + agent.project.update( + namespace=namespace, + name=name, + tag="default", + update_dict={"project": peprs.Project.from_dict(prj)}, + ) + updated = agent.project.get(namespace=namespace, name=name, raw=True) + assert len(updated["samples"]) == original_count + 2 @pytest.mark.parametrize( "namespace, name", @@ -444,20 +448,20 @@ def test_delete_multiple_rows(self, namespace, name): peppy_prj = agent.project.get(namespace=namespace, name=name, raw=True) prj = agent.project.get(namespace=namespace, name=name, raw=True, with_id=True) - del prj["_sample_dict"][1] - del prj["_sample_dict"][2] + del prj["samples"][1] + del prj["samples"][2] agent.project.update( namespace=namespace, name=name, tag="default", - update_dict={"project": peppy.Project.from_dict(prj)}, + update_dict={"project": peprs.Project.from_dict(prj)}, ) - del peppy_prj["_sample_dict"][1] # delete sample - del peppy_prj["_sample_dict"][2] # delete sample + del peppy_prj["samples"][1] # delete sample + del peppy_prj["samples"][2] # delete sample - assert peppy.Project.from_dict(peppy_prj) == agent.project.get( + assert peprs.Project.from_dict(peppy_prj) == agent.project.get( namespace=namespace, name=name, raw=False ) @@ -473,18 +477,18 @@ def test_modify_one_row(self, namespace, name): peppy_prj = agent.project.get(namespace=namespace, name=name, raw=True) prj = agent.project.get(namespace=namespace, name=name, raw=True, with_id=True) - prj["_sample_dict"][0]["sample_name"] = "new_sample_name2" + prj["samples"][0]["sample_name"] = "new_sample_name2" agent.project.update( namespace=namespace, name=name, tag="default", - update_dict={"project": peppy.Project.from_dict(prj)}, + update_dict={"project": peprs.Project.from_dict(prj)}, ) - peppy_prj["_sample_dict"][0]["sample_name"] = "new_sample_name2" # modify sample + peppy_prj["samples"][0]["sample_name"] = "new_sample_name2" # modify sample - assert peppy.Project.from_dict(peppy_prj) == agent.project.get( + assert peprs.Project.from_dict(peppy_prj) == agent.project.get( namespace=namespace, name=name, raw=False ) @@ -500,20 +504,20 @@ def test_modify_multiple_rows(self, namespace, name): peppy_prj = agent.project.get(namespace=namespace, name=name, raw=True) prj = agent.project.get(namespace=namespace, name=name, raw=True, with_id=True) - prj["_sample_dict"][0]["sample_name"] = "new_sample_name2" - prj["_sample_dict"][1]["sample_name"] = "new_sample_name3" + prj["samples"][0]["sample_name"] = "new_sample_name2" + prj["samples"][1]["sample_name"] = "new_sample_name3" agent.project.update( namespace=namespace, name=name, tag="default", - update_dict={"project": peppy.Project.from_dict(prj)}, + update_dict={"project": peprs.Project.from_dict(prj)}, ) - peppy_prj["_sample_dict"][0]["sample_name"] = "new_sample_name2" # modify sample - peppy_prj["_sample_dict"][1]["sample_name"] = "new_sample_name3" # modify sample + peppy_prj["samples"][0]["sample_name"] = "new_sample_name2" # modify sample + peppy_prj["samples"][1]["sample_name"] = "new_sample_name3" # modify sample - assert peppy.Project.from_dict(peppy_prj) == agent.project.get( + assert peprs.Project.from_dict(peppy_prj) == agent.project.get( namespace=namespace, name=name, raw=False ) @@ -535,19 +539,19 @@ def test_add_new_first_sample(self, namespace, name): PEPHUB_SAMPLE_ID_KEY: None, } - prj["_sample_dict"].insert(0, new_sample.copy()) + prj["samples"].insert(0, new_sample.copy()) agent.project.update( namespace=namespace, name=name, tag="default", - update_dict={"project": peppy.Project.from_dict(prj)}, + update_dict={"project": peprs.Project.from_dict(prj)}, ) del new_sample[PEPHUB_SAMPLE_ID_KEY] - peppy_prj["_sample_dict"].insert(0, new_sample.copy()) # add sample without id + peppy_prj["samples"].insert(0, new_sample.copy()) # add sample without id - assert peppy.Project.from_dict(peppy_prj) == agent.project.get( + assert peprs.Project.from_dict(peppy_prj) == agent.project.get( namespace=namespace, name=name, raw=False ) @@ -563,26 +567,26 @@ def test_change_sample_order(self, namespace, name): peppy_prj = agent.project.get(namespace=namespace, name=name, raw=True) prj = agent.project.get(namespace=namespace, name=name, raw=True, with_id=True) - sample1 = prj["_sample_dict"][0].copy() - sample2 = prj["_sample_dict"][1].copy() + sample1 = prj["samples"][0].copy() + sample2 = prj["samples"][1].copy() - prj["_sample_dict"][0] = sample2 - prj["_sample_dict"][1] = sample1 + prj["samples"][0] = sample2 + prj["samples"][1] = sample1 agent.project.update( namespace=namespace, name=name, tag="default", - update_dict={"project": peppy.Project.from_dict(prj)}, + update_dict={"project": peprs.Project.from_dict(prj)}, ) - peppy_prj["_sample_dict"][0] = sample2 - peppy_prj["_sample_dict"][1] = sample1 + peppy_prj["samples"][0] = sample2 + peppy_prj["samples"][1] = sample1 - del peppy_prj["_sample_dict"][0][PEPHUB_SAMPLE_ID_KEY] - del peppy_prj["_sample_dict"][1][PEPHUB_SAMPLE_ID_KEY] + del peppy_prj["samples"][0][PEPHUB_SAMPLE_ID_KEY] + del peppy_prj["samples"][1][PEPHUB_SAMPLE_ID_KEY] - assert peppy.Project.from_dict(peppy_prj) == agent.project.get( + assert peprs.Project.from_dict(peppy_prj) == agent.project.get( namespace=namespace, name=name, raw=False ) @@ -597,7 +601,7 @@ def test_update_porject_without_ids(self, namespace, name): with PEPDBAgentContextManager(add_data=True) as agent: prj = agent.project.get(namespace=namespace, name=name, raw=True, with_id=False) - prj["_sample_dict"][0]["sample_name"] = "new_sample_name2" + prj["samples"][0]["sample_name"] = "new_sample_name2" with pytest.raises(SampleTableUpdateError): @@ -605,7 +609,7 @@ def test_update_porject_without_ids(self, namespace, name): namespace=namespace, name=name, tag="default", - update_dict={"project": peppy.Project.from_dict(prj)}, + update_dict={"project": peprs.Project.from_dict(prj)}, ) @pytest.mark.parametrize( @@ -617,12 +621,12 @@ def test_update_porject_without_ids(self, namespace, name): def test_update_project_with_duplicated_sample_guids(self, namespace, name): with PEPDBAgentContextManager(add_data=True) as agent: new_prj = agent.project.get(namespace=namespace, name=name, raw=True, with_id=True) - new_prj["_sample_dict"].append(new_prj["_sample_dict"][0]) + new_prj["samples"].append(new_prj["samples"][0]) with pytest.raises(ProjectDuplicatedSampleGUIDsError): agent.project.update( namespace=namespace, name=name, tag="default", - update_dict={"project": peppy.Project.from_dict(new_prj)}, + update_dict={"project": peprs.Project.from_dict(new_prj)}, ) diff --git a/tests/utils.py b/tests/utils.py index fc9cc89..56f878a 100644 --- a/tests/utils.py +++ b/tests/utils.py @@ -1,7 +1,7 @@ import os import warnings -import peppy +import peprs import yaml from sqlalchemy.exc import OperationalError @@ -106,7 +106,11 @@ def _insert_data(self): else: private = False for name, path in item.items(): - prj = peppy.Project(path) + try: + prj = peprs.Project(path) + except Exception as e: + warnings.warn(f"Skipping {namespace}/{name}: {e}") + continue pepdb_con.project.create( namespace=namespace, name=name, From 6f4bae0d76d7fe4d1f51fca79fc7eef165b25daf Mon Sep 17 00:00:00 2001 From: Oleksandr <41573628+khoroshevskyi@users.noreply.github.com> Date: Mon, 29 Jun 2026 14:05:58 -0400 Subject: [PATCH 2/3] Update tests/utils.py Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com> --- tests/utils.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/tests/utils.py b/tests/utils.py index 56f878a..f440124 100644 --- a/tests/utils.py +++ b/tests/utils.py @@ -109,8 +109,9 @@ def _insert_data(self): try: prj = peprs.Project(path) except Exception as e: - warnings.warn(f"Skipping {namespace}/{name}: {e}") - continue + raise RuntimeError( + f"Failed to load test project {namespace}/{name} from {path}" + ) from e pepdb_con.project.create( namespace=namespace, name=name, From 8f4972dffb52340dd00b5338e5e3adaf82965d17 Mon Sep 17 00:00:00 2001 From: Khoroshevskyi Date: Mon, 29 Jun 2026 14:16:39 -0400 Subject: [PATCH 3/3] PR fixes and lint --- docs/changelog.md | 5 +++++ pepdbagent/_version.py | 2 +- pepdbagent/const.py | 3 +++ pepdbagent/modules/project.py | 9 ++++----- pepdbagent/modules/sample.py | 6 +++--- pepdbagent/modules/view.py | 4 +--- requirements/requirements-all.txt | 2 +- tests/test_project.py | 19 ++++--------------- 8 files changed, 22 insertions(+), 28 deletions(-) diff --git a/docs/changelog.md b/docs/changelog.md index 96b6a03..f97b695 100644 --- a/docs/changelog.md +++ b/docs/changelog.md @@ -2,6 +2,11 @@ This project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html) and [Keep a Changelog](https://keepachangelog.com/en/1.0.0/) format. + +## [0.13.0] -- 2026-07-01 +- Removed peppy from dependencies, using peprs instead +- Bug fixes and speed improvements in project update, validate and upload methods + ## [0.12.4] -- 2026-01-26 - Added project search by tag in annotation module - Updated github actions workflows diff --git a/pepdbagent/_version.py b/pepdbagent/_version.py index 6dd4954..f23a6b3 100644 --- a/pepdbagent/_version.py +++ b/pepdbagent/_version.py @@ -1 +1 @@ -__version__ = "0.12.4" +__version__ = "0.13.0" diff --git a/pepdbagent/const.py b/pepdbagent/const.py index 2bf7c93..72b6170 100644 --- a/pepdbagent/const.py +++ b/pepdbagent/const.py @@ -24,3 +24,6 @@ DEFAULT_TAG_VERSION = "1.0.0" LATEST_SCHEMA_VERSION = "latest" + +SAMPLE_NAME_ATTR = "sample_name" +SAMPLE_TABLE_INDEX_KEY = "sample_table_index" diff --git a/pepdbagent/modules/project.py b/pepdbagent/modules/project.py index 6259041..8fc75dc 100644 --- a/pepdbagent/modules/project.py +++ b/pepdbagent/modules/project.py @@ -11,8 +11,7 @@ SUBSAMPLE_RAW_DICT_KEY, ) -SAMPLE_NAME_ATTR = "sample_name" -SAMPLE_TABLE_INDEX_KEY = "sample_table_index" + from sqlalchemy import Select, and_, delete, select from sqlalchemy.exc import IntegrityError, NoResultFound from sqlalchemy.orm import Session @@ -26,6 +25,8 @@ PEPHUB_SAMPLE_ID_KEY, PKG_NAME, LATEST_SCHEMA_VERSION, + SAMPLE_NAME_ATTR, + SAMPLE_TABLE_INDEX_KEY, ) from pepdbagent.db_utils import ( BaseEngine, @@ -600,9 +601,7 @@ def update( update_values = update_dict else: if "project" in update_dict: - project_dict = update_dict.pop("project").to_dict( - raw=True, by_sample=True - ) + project_dict = update_dict.pop("project").to_dict(raw=True, by_sample=True) update_dict["config"] = project_dict[CONFIG_KEY] update_dict["samples"] = project_dict[SAMPLE_RAW_DICT_KEY] update_dict["subsamples"] = project_dict.get(SUBSAMPLE_RAW_DICT_KEY, []) diff --git a/pepdbagent/modules/sample.py b/pepdbagent/modules/sample.py index c9d12bf..56c1339 100644 --- a/pepdbagent/modules/sample.py +++ b/pepdbagent/modules/sample.py @@ -7,7 +7,7 @@ from sqlalchemy.orm import Session from sqlalchemy.orm.attributes import flag_modified -from pepdbagent.const import DEFAULT_TAG, PKG_NAME +from pepdbagent.const import DEFAULT_TAG, PKG_NAME, SAMPLE_TABLE_INDEX_KEY from pepdbagent.db_utils import BaseEngine, Projects, Samples from pepdbagent.exceptions import SampleAlreadyExistsError, SampleNotFoundError from pepdbagent.utils import generate_guid, order_samples @@ -144,7 +144,7 @@ def update( sample_mapping.sample.update(update_dict) try: sample_mapping.sample_name = sample_mapping.sample[ - project_mapping.config.get("sample_table_index", "sample_name") + project_mapping.config.get(SAMPLE_TABLE_INDEX_KEY, "sample_name") ] except KeyError: raise KeyError( @@ -195,7 +195,7 @@ def add( project_mapping = session.scalar(project_statement) try: sample_name = sample_dict[ - project_mapping.config.get("sample_table_index", "sample_name") + project_mapping.config.get(SAMPLE_TABLE_INDEX_KEY, "sample_name") ] except KeyError: raise KeyError( diff --git a/pepdbagent/modules/view.py b/pepdbagent/modules/view.py index 5dd79ea..673e172 100644 --- a/pepdbagent/modules/view.py +++ b/pepdbagent/modules/view.py @@ -385,9 +385,7 @@ def get_snap_view( if raw: return {"config": config, "samples": samples} else: - return peprs.Project.from_dict( - {"config": config, "samples": samples} - ) + return peprs.Project.from_dict({"config": config, "samples": samples}) def get_views_annotation( self, namespace: str, name: str, tag: str = DEFAULT_TAG diff --git a/requirements/requirements-all.txt b/requirements/requirements-all.txt index e6a85bc..6f9874e 100644 --- a/requirements/requirements-all.txt +++ b/requirements/requirements-all.txt @@ -1,6 +1,6 @@ sqlalchemy>=2.0.0 logmuse>=0.2.7 -peprs>=0.1.2 +peprs>=0.2.0 ubiquerg>=0.6.2 coloredlogs>=15.0.1 pytest-mock diff --git a/tests/test_project.py b/tests/test_project.py index c79198a..86ea816 100644 --- a/tests/test_project.py +++ b/tests/test_project.py @@ -93,10 +93,7 @@ def test_get_subsamples(self, namespace, name): ) orgiginal_prj = peprs.Project(get_path_to_example_file(namespace, name)) - assert ( - prj_subtables - == orgiginal_prj.to_dict(raw=True, by_sample=True)["subsamples"] - ) + assert prj_subtables == orgiginal_prj.to_dict(raw=True, by_sample=True)["subsamples"] @pytest.mark.parametrize( "namespace, name", @@ -111,10 +108,7 @@ def test_get_samples_raw(self, namespace, name): ) orgiginal_prj = peprs.Project(get_path_to_example_file(namespace, name)) - assert ( - prj_samples - == orgiginal_prj.to_dict(raw=True, by_sample=True)["samples"] - ) + assert prj_samples == orgiginal_prj.to_dict(raw=True, by_sample=True)["samples"] @pytest.mark.parametrize( "namespace, name", @@ -131,18 +125,13 @@ def test_get_samples_processed(self, namespace, name): raw=False, ) orgiginal_prj = peprs.Project(get_path_to_example_file(namespace, name)) - expected = ( - orgiginal_prj.to_pandas().replace({np.nan: None}).to_dict(orient="records") - ) + expected = orgiginal_prj.to_pandas().replace({np.nan: None}).to_dict(orient="records") # Normalize numpy arrays (used for subsample list columns) to plain lists # so dict equality works without raising "truth value is ambiguous". def _normalize(samples): return [ - { - k: (v.tolist() if isinstance(v, np.ndarray) else v) - for k, v in s.items() - } + {k: (v.tolist() if isinstance(v, np.ndarray) else v) for k, v in s.items()} for s in samples ]