Skip to content
Merged

Peprs #455

Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 5 additions & 3 deletions pephub/const.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,18 +6,20 @@
import pandas as pd
from fastapi import __version__ as fastapi_version
from pepdbagent import __version__ as pepdbagent_version
from peppy import __version__ as peppy_version
from peppy.const import PEP_LATEST_VERSION

from ._version import __version__ as pephub_version

# peprs has no __version__ attribute and no PEP_LATEST_VERSION constant.
PEP_LATEST_VERSION = "2.1.0"
peprs_version = "unknown"

PKG_NAME = "pephub"
DATA_REPO = "https://github.com/pepkit/data.pephub.git"


ALL_VERSIONS = {
"pephub_version": pephub_version,
"peppy_version": peppy_version,
"peprs_version": peprs_version,
"python_version": python_version(),
"fastapi_version": fastapi_version,
"pepdbagent_version": pepdbagent_version,
Expand Down
20 changes: 11 additions & 9 deletions pephub/helpers.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,16 +9,18 @@
import json
from fastapi import Response, UploadFile
from fastapi.exceptions import HTTPException
from peppy.const import (
CFG_SAMPLE_TABLE_KEY,
CFG_SUBSAMPLE_TABLE_KEY,
from peprs.const import (
CONFIG_KEY,
NAME_KEY,
SAMPLE_RAW_DICT_KEY,
SUBSAMPLE_RAW_LIST_KEY,
SUBSAMPLE_RAW_DICT_KEY,
)
from .const import JWT_EXPIRATION, JWT_SECRET

# peprs.const does not export these — they are PEP config schema strings.
CFG_SAMPLE_TABLE_KEY = "sample_table"
CFG_SUBSAMPLE_TABLE_KEY = "subsample_table"
NAME_KEY = "name"


def jwt_encode_user_data(user_data: dict, exp: datetime = None) -> str:
"""
Expand Down Expand Up @@ -53,15 +55,15 @@ def zip_pep(project: Dict[str, Any]) -> Response:
project[SAMPLE_RAW_DICT_KEY]
).to_csv(index=False)

if project[SUBSAMPLE_RAW_LIST_KEY] is not None:
if not isinstance(project[SUBSAMPLE_RAW_LIST_KEY], list):
if project[SUBSAMPLE_RAW_DICT_KEY] is not None:
if not isinstance(project[SUBSAMPLE_RAW_DICT_KEY], list):
config[CFG_SUBSAMPLE_TABLE_KEY] = ["subsample_table1.csv"]
content_to_zip["subsample_table1.csv"] = pd.DataFrame(
project[SUBSAMPLE_RAW_LIST_KEY]
project[SUBSAMPLE_RAW_DICT_KEY]
).to_csv(index=False)
else:
config[CFG_SUBSAMPLE_TABLE_KEY] = []
for number, file in enumerate(project[SUBSAMPLE_RAW_LIST_KEY]):
for number, file in enumerate(project[SUBSAMPLE_RAW_DICT_KEY]):
file_name = f"subsample_table{number + 1}.csv"
config[CFG_SUBSAMPLE_TABLE_KEY].append(file_name)
content_to_zip[file_name] = pd.DataFrame(file).to_csv(index=False)
Expand Down
6 changes: 3 additions & 3 deletions pephub/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,12 +28,12 @@
fmt="[%(levelname)s] [%(asctime)s] [PEPDBAGENT] %(message)s",
)

_LOGGER_PEPPY = logging.getLogger("peppy")
_LOGGER_PEPRS = logging.getLogger("peprs")
coloredlogs.install(
logger=_LOGGER_PEPPY,
logger=_LOGGER_PEPRS,
level=logging.ERROR,
datefmt="%b %d %Y %H:%M:%S",
fmt="[%(levelname)s] [%(asctime)s] [PEPPY] %(message)s",
fmt="[%(levelname)s] [%(asctime)s] [PEPRS] %(message)s",
)

_LOGGER_PEPHUB = logging.getLogger("uvicorn.access")
Expand Down
54 changes: 21 additions & 33 deletions pephub/routers/api/v1/helpers.py
Original file line number Diff line number Diff line change
@@ -1,16 +1,14 @@
import logging

import eido
from eido.validation import validate_config
from eido.exceptions import EidoValidationError
import peppy
import peprs
import yaml
from fastapi.exceptions import HTTPException
from peppy import Project
from peppy.const import (
from peprs import Project
from peprs.eido import EidoValidationError, validate_config, validate_project
from peprs.const import (
CONFIG_KEY,
SAMPLE_RAW_DICT_KEY,
SUBSAMPLE_RAW_LIST_KEY,
SUBSAMPLE_RAW_DICT_KEY,
)
from ....dependencies import (
get_db,
Expand All @@ -22,7 +20,7 @@
DEFAULT_SCHEMA_VERSION = "2.1.0"


async def verify_updated_project(updated_project) -> peppy.Project:
async def verify_updated_project(updated_project) -> peprs.Project:
new_raw_project = {}

agent = get_db()
Expand All @@ -37,43 +35,23 @@ async def verify_updated_project(updated_project) -> peppy.Project:
status_code=400,
detail="Please provide a sample table and project config yaml to update project",
)
try:
validate_config(
yaml.safe_load(updated_project.project_config_yaml), default_schema
)
except EidoValidationError as e:
raise HTTPException(
status_code=400,
detail=f"Config structure error: {', '.join(list(e.errors_by_type.keys()))}. Please check schema definition and try again.",
)
# sample table update
new_raw_project[SAMPLE_RAW_DICT_KEY] = updated_project.sample_table

try:
yaml_dict = yaml.safe_load(updated_project.project_config_yaml)
new_raw_project[CONFIG_KEY] = yaml_dict
except yaml.scanner.ScannerError as e:
raise HTTPException(
status_code=400,
detail=f"Could not parse provided yaml. Error: {e}",
)

# sample_table_index_col = yaml_dict.get(
# SAMPLE_TABLE_INDEX_KEY, SAMPLE_NAME_ATTR # default to sample_name
# )

# await check_sample_names(
# new_raw_project[SAMPLE_RAW_DICT_KEY], sample_table_index_col
# )
new_raw_project[CONFIG_KEY] = yaml_dict
new_raw_project[SAMPLE_RAW_DICT_KEY] = updated_project.sample_table

# subsample table update
if updated_project.subsample_tables is not None:
subsamples = list(updated_project.subsample_tables[0][0].values())
new_raw_project[SUBSAMPLE_RAW_LIST_KEY] = (
updated_project.subsample_tables
if len(subsamples) > 0 and subsamples[0]
else None
)
if len(subsamples) > 0 and subsamples[0]:
new_raw_project[SUBSAMPLE_RAW_DICT_KEY] = updated_project.subsample_tables

try:
new_project = Project.from_dict(new_raw_project)
Expand All @@ -83,9 +61,19 @@ async def verify_updated_project(updated_project) -> peppy.Project:
detail=f"Could not create PEP from provided data. Error: {e}",
)

# peprs.eido.validate_config takes a Project (not a raw dict like eido did),
# so we validate after constructing the Project.
try:
validate_config(new_project, default_schema)
except EidoValidationError as e:
raise HTTPException(
status_code=400,
detail=f"Config structure error: {', '.join(list(e.errors_by_type.keys()))}. Please check schema definition and try again.",
)

try:
# validate project (it will also validate samples)
eido.validate_project(new_project, default_schema)
validate_project(new_project, default_schema)
except Exception as _:
raise HTTPException(
status_code=400,
Expand Down
27 changes: 16 additions & 11 deletions pephub/routers/api/v1/namespace.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
from typing import List, Literal, Optional, Union
import os

import peppy
import peprs
from dotenv import load_dotenv
from fastapi import APIRouter, Depends, File, Form, HTTPException, UploadFile, Request
from fastapi.responses import JSONResponse
Expand All @@ -22,10 +22,13 @@
NamespaceStats,
TarNamespaceModelReturn,
)
from peppy import Project
from peppy.const import DESC_KEY, NAME_KEY
from peprs import Project
from typing_extensions import Annotated

# peprs.const does not export these — they are PEP config schema strings.
NAME_KEY = "name"
DESC_KEY = "description"

from ....const import (
DEFAULT_TAG,
ARCHIVE_URL_PATH,
Expand Down Expand Up @@ -213,7 +216,7 @@ async def create_pep(
},
status_code=202,
)
# create a blank peppy.Project object with fake files
# create a blank peprs.Project object with fake files
else:
raise HTTPException(
detail="Project files were not provided",
Expand Down Expand Up @@ -258,21 +261,23 @@ async def upload_raw_pep(
# This configurations needed due to Issue #124 Should be removed in the future
project_dict = ProjectRawModel(**project_from_json.pep_dict.dict())
ff = project_dict.model_dump(by_alias=True)
p_project = peppy.Project().from_dict(ff)
p_project = peprs.Project.from_dict(ff)

p_project.namespace = name
# peprs.Project has no `namespace` attribute, so we set the registry name
# in the config and pass `name` separately to agent.project.create.
p_project.name = name
p_project.description = description

except Exception as e:
raise HTTPException(
detail=f"Incorrect raw project was provided. Couldn't initiate peppy object: {e}",
detail=f"Incorrect raw project was provided. Couldn't initiate peprs object: {e}",
status_code=417,
)
try:
agent.project.create(
p_project,
namespace=namespace,
name=p_project.namespace,
name=name,
tag=tag,
description=description,
is_private=is_private,
Expand All @@ -282,15 +287,15 @@ async def upload_raw_pep(
)
except ProjectUniqueNameError:
raise HTTPException(
detail=f"Project '{namespace}/{p_project.namespace}:{tag}' already exists in namespace",
detail=f"Project '{namespace}/{name}:{tag}' already exists in namespace",
status_code=400,
)
return JSONResponse(
content={
"namespace": namespace,
"name": p_project.namespace,
"name": name,
"tag": tag,
"registry_path": f"{namespace}/{p_project.namespace}:{tag}",
"registry_path": f"{namespace}/{name}:{tag}",
},
status_code=202,
)
Expand Down
56 changes: 29 additions & 27 deletions pephub/routers/api/v1/project.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,9 @@
import logging
from typing import Annotated, Any, Literal, Dict, List, Optional, Union

import eido
import numpy as np
import pandas as pd
import peppy
import peprs
import yaml
from dotenv import load_dotenv
from fastapi import APIRouter, Body, Depends, Query
Expand All @@ -29,7 +28,7 @@
ProjectViews,
HistoryAnnotationModel,
)
from peppy.const import SAMPLE_RAW_DICT_KEY
from peprs.const import SAMPLE_RAW_DICT_KEY

# from ....const import SAMPLE_CONVERSION_FUNCTIONS
from ....dependencies import (
Expand Down Expand Up @@ -262,25 +261,23 @@ async def get_pep_samples(
)

if isinstance(proj, dict):
if len(proj["_sample_dict"]) > MAX_PROCESSED_PROJECT_SIZE:
if len(proj[SAMPLE_RAW_DICT_KEY]) > MAX_PROCESSED_PROJECT_SIZE:
raise HTTPException(
status_code=400,
detail=f"Project is too large. View raw samples, or create a view. Limit is {MAX_PROCESSED_PROJECT_SIZE} samples.",
)
proj = peppy.Project.from_dict(proj)
proj = peprs.Project.from_dict(proj)

if format == "json":
return {
"samples": [sample.to_dict() for sample in proj.samples],
}
elif format == "csv":
return PlainTextResponse(eido.convert_project(proj, "csv")["samples"])
return PlainTextResponse(proj.to_csv_string())
elif format == "yaml":
return PlainTextResponse(
eido.convert_project(proj, "yaml-samples")["samples"]
)
return PlainTextResponse(proj.to_yaml_string())
elif format == "basic":
return eido.convert_project(proj, "basic")
return proj.to_dict()

if raw:
df = pd.DataFrame(proj[SAMPLE_RAW_DICT_KEY])
Expand All @@ -289,12 +286,12 @@ async def get_pep_samples(
items=df.replace({np.nan: None}).to_dict(orient="records"),
)
if isinstance(proj, dict):
if len(proj["_sample_dict"]) > MAX_PROCESSED_PROJECT_SIZE:
if len(proj[SAMPLE_RAW_DICT_KEY]) > MAX_PROCESSED_PROJECT_SIZE:
raise HTTPException(
status_code=400,
detail=f"Project is too large. View raw samples, or create a view. Limit is {MAX_PROCESSED_PROJECT_SIZE} samples.",
)
proj = peppy.Project.from_dict(proj)
proj = peprs.Project.from_dict(proj)
return [sample.to_dict() for sample in proj.samples]


Expand Down Expand Up @@ -500,7 +497,7 @@ async def delete_sample(

@project.get("/subsamples", response_model=SamplesResponseModel)
async def get_subsamples_endpoint(
subsamples: peppy.Project = Depends(get_subsamples),
subsamples: list = Depends(get_subsamples),
download: bool = False,
):
"""
Expand Down Expand Up @@ -543,11 +540,8 @@ async def convert_pep(
format: Optional[str] = "plain",
):
"""
Convert a PEP to a specific format, f. For a list of available formats/filters,
see /eido/filters.

See, http://eido.databio.org/en/latest/filters/#convert-a-pep-into-an-alternative-format-with-a-filter
for more information.
Convert a PEP to a specific format. Supported filters are: basic, csv, yaml,
yaml-samples, json.

Don't have a namespace, or project?

Expand All @@ -559,18 +553,26 @@ async def convert_pep(
"""
# default to basic
if filter is None:
filter = "basic" # default to basic
filter = "basic"

# validate filter exists
filter_list = eido.get_available_pep_filters()
if filter not in filter_list:
# eido filter infrastructure is not in peprs; emulate the previously supported
# filters using peprs Project conversion methods.
available_filters = ["basic", "csv", "yaml", "yaml-samples", "json"]
if filter not in available_filters:
raise HTTPException(
400, f"Unknown filter '{filter}'. Available filters: {filter_list}"
400, f"Unknown filter '{filter}'. Available filters: {available_filters}"
)

# generate result
peppy_project = peppy.Project.from_dict(proj)
conv_result = eido.run_filter(peppy_project, filter, verbose=False)
peprs_project = peprs.Project.from_dict(proj)

if filter == "basic":
conv_result = {"project_config.yaml": peprs_project.to_yaml_string()}
elif filter == "csv":
conv_result = {"sample_table.csv": peprs_project.to_csv_string()}
elif filter in ("yaml", "yaml-samples"):
conv_result = {"sample_table.yaml": peprs_project.to_yaml_string()}
else: # json
conv_result = {"project.json": peprs_project.to_json_string()}

if format == "plain":
return_str = "\n".join([conv_result[k] for k in conv_result])
Expand Down Expand Up @@ -996,7 +998,7 @@ def get_project_history_by_id(
with_id=True,
)
# convert the config to a yaml string
project_at_history["_config"] = yaml.dump(project_at_history["_config"])
project_at_history["config"] = yaml.dump(project_at_history["config"])
return project_at_history

except ProjectNotFoundError:
Expand Down
Loading
Loading