Skip to content

Commit d57031d

Browse files
committed
Merge branch 'trs/remote/download/not-in-manifest'
2 parents cc0036b + ad276fc commit d57031d

2 files changed

Lines changed: 82 additions & 33 deletions

File tree

CHANGES.md

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -65,6 +65,22 @@ This release drops support for Python versions 3.6 and 3.7 and adds support for
6565
the failure.
6666
([#341](https://github.com/nextstrain/cli/pull/341))
6767

68+
* `nextstrain remote download` now supports downloading core datasets which are
69+
only visible on the web by direct access via their URL. For example, the
70+
following now work (where in previous versions they did not):
71+
72+
nextstrain remote download https://nextstrain.org/nextclade/sars-cov-2/21L
73+
nextstrain remote download https://nextstrain.org/enterovirus/d68/vp1/2020-01-23
74+
75+
This support also covers [past snapshots of
76+
datasets](https://docs.nextstrain.org/en/latest/guides/versions.html), which
77+
is a recently-added feature to nextstrain.org. For example:
78+
79+
nextstrain remote download https://nextstrain.org/flu/seasonal/h3n2/ha/6y@2023-07-01
80+
nextstrain remote download https://nextstrain.org/ncov/gisaid/21L/global/6m@2024-01-09
81+
82+
([#345](https://github.com/nextstrain/cli/pull/345))
83+
6884
## Bug fixes
6985

7086
* Commands making use of user authentication (e.g. `nextstrain login`,

nextstrain/cli/remote/nextstrain_dot_org.py

Lines changed: 66 additions & 33 deletions
Original file line numberDiff line numberDiff line change
@@ -103,16 +103,15 @@ class NormalizedPath(PurePosixPath):
103103

104104
class Resource:
105105
"""
106-
Base class for a remote Nextstrain resource, as described by a Charon API
107-
"getAvailable" response.
106+
Base class for a remote Nextstrain resource described by its *path*.
108107
109108
Concretely, either a :class:`Dataset` or :class:`Narrative` currently.
110109
"""
111110
path: NormalizedPath
112111
subresources: List['SubResource']
113112

114-
def __init__(self, api_item: dict):
115-
self.path = normalize_path(api_item["request"])
113+
def __init__(self, path: str):
114+
self.path = normalize_path(path)
116115

117116

118117
class SubResource(NamedTuple):
@@ -133,45 +132,44 @@ class SubResource(NamedTuple):
133132
file_extension: str
134133
primary: bool = False
135134

135+
def __str__(self) -> str:
136+
type, subtype = self.media_type.split("/", 1)
137+
subtype_sans_suffix, *_ = subtype.split("+", 1)
138+
subtype_tree = tuple(subtype_sans_suffix.split("."))
139+
140+
resource = (
141+
"dataset" if subtype_tree[0:3] == ("vnd", "nextstrain", "dataset") else
142+
"narrative" if subtype_tree[0:3] == ("vnd", "nextstrain", "narrative") else
143+
self.media_type
144+
)
145+
146+
sidecar = sidecar_suffix(self.media_type)
147+
148+
return f"{resource} ({sidecar})" if sidecar else resource
149+
136150

137151
class Dataset(Resource):
138152
"""
139-
A remote Nextstrain dataset, as described by a Charon API response,
140-
extended for the nextstrain.org RESTful API.
153+
A remote Nextstrain dataset as described by its *path* and optional list of
154+
*sidecars*.
141155
"""
142-
def __init__(self, api_item):
143-
super().__init__(api_item)
156+
def __init__(self, path: str, sidecars: Optional[List[str]] = None):
157+
super().__init__(path)
144158

145-
default_sidecars = ["root-sequence", "tip-frequencies", "measurements"]
159+
if sidecars is None:
160+
sidecars = ["root-sequence", "tip-frequencies", "measurements"]
146161

147162
self.subresources = [
148163
SubResource("application/vnd.nextstrain.dataset.main+json", ".json", primary = True),
149164

150-
# XXX TODO: The "sidecars" field in the /charon/getAvailable API
151-
# response doesn't actually exist yet and its use here is
152-
# prospective.
153-
#
154-
# I plan to extend the /charon/getAvailable API endpoint (or maybe
155-
# switch to a new endpoint) in the future to include the "sidecars"
156-
# field listing the available sidecars for each dataset, so that
157-
# this code only has to try to fetch what is reported to exist.
158-
# More than just reducing requests, the primary upshot is looser
159-
# coupling by avoiding the need to update the hardcoded list of
160-
# sidecars here and get people to upgrade their installed version
161-
# of this CLI if we add a new sidecar in the future. Other API
162-
# clients would also likely benefit.
163-
#
164-
# -trs, 18 August 2021
165-
#
166165
*[SubResource(f"application/vnd.nextstrain.dataset.{type}+json", ".json")
167-
for type in api_item.get("sidecars", default_sidecars)],
166+
for type in sidecars],
168167
]
169168

170169

171170
class Narrative(Resource):
172171
"""
173-
A remote Nextstrain narrative, as described by a Charon API response,
174-
extended for the nextstrain.org RESTful API.
172+
A remote Nextstrain narrative as described by its *path*.
175173
"""
176174
subresources = [
177175
SubResource("text/vnd.nextstrain.narrative+markdown", ".md", primary = True),
@@ -344,7 +342,18 @@ def download(url: URL, local_path: Path, recursively: bool = False, dry_run: boo
344342
with requests.Session() as http:
345343
http.auth = auth(origin)
346344

347-
resources = _ls(origin, path, recursively = recursively, http = http)
345+
if recursively:
346+
resources = _ls(origin, path, recursively = recursively, http = http)
347+
else:
348+
# Avoid the query and just try to download the single resource.
349+
# This saves a request for single-dataset (or narrative) downloads,
350+
# but also allows downloading core datasets which aren't in the
351+
# manifest. (At least until the manifest goes away.)
352+
# -trs, 9 Nov 2022
353+
if narratives_only(path):
354+
resources = [Narrative(str(path))]
355+
else:
356+
resources = [Dataset(str(path))]
348357

349358
if not resources:
350359
raise UserError(f"Path {path} does not seem to exist")
@@ -369,7 +378,9 @@ def download(url: URL, local_path: Path, recursively: bool = False, dry_run: boo
369378

370379
# Check for bad response
371380
raise_for_status(response)
372-
assert content_media_type(response) == subresource.media_type
381+
382+
if content_media_type(response) != subresource.media_type:
383+
raise UserError(f"Path {path} does not seem to be a {subresource}.")
373384

374385
# Local destination
375386
if local_path.is_dir():
@@ -442,9 +453,31 @@ def matches_path(x: Resource):
442453
else:
443454
return x.path == path
444455

456+
def to_dataset(api_item: dict) -> Dataset:
457+
# XXX TODO: The "sidecars" field in the /charon/getAvailable API
458+
# response doesn't actually exist yet and its use here is
459+
# prospective.
460+
#
461+
# I plan to extend the /charon/getAvailable API endpoint (or maybe
462+
# switch to a new endpoint) in the future to include the "sidecars"
463+
# field listing the available sidecars for each dataset, so that
464+
# this code only has to try to fetch what is reported to exist.
465+
# More than just reducing requests, the primary upshot is looser
466+
# coupling by avoiding the need to update the hardcoded list of
467+
# sidecars here and get people to upgrade their installed version
468+
# of this CLI if we add a new sidecar in the future. Other API
469+
# clients would also likely benefit.
470+
#
471+
# -trs, 18 August 2021
472+
#
473+
return Dataset(api_item["request"], api_item.get("sidecars"))
474+
475+
def to_narrative(api_item: dict) -> Narrative:
476+
return Narrative(api_item["request"])
477+
445478
return [
446-
*filter(matches_path, map(Dataset, available["datasets"])),
447-
*filter(matches_path, map(Narrative, available["narratives"])),
479+
*filter(matches_path, map(to_dataset, available["datasets"])),
480+
*filter(matches_path, map(to_narrative, available["narratives"])),
448481
]
449482

450483

@@ -649,7 +682,7 @@ def api_endpoint(origin: Origin, path: Union[str, PurePosixPath]) -> str:
649682
>>> api_endpoint(URL("http://localhost:5000/x/").origin, "a/b/c")
650683
'http://localhost:5000/a/b/c'
651684
"""
652-
return origin + "/" + urlquote(str(path).lstrip("/"))
685+
return origin + "/" + urlquote(str(path).lstrip("/"), safe = "/@")
653686

654687

655688
class auth(requests.auth.AuthBase):

0 commit comments

Comments
 (0)