Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
15 changes: 10 additions & 5 deletions S3MP/mirror_path.py
Original file line number Diff line number Diff line change
Expand Up @@ -199,11 +199,16 @@ def download_to_mirror_if_not_present(self):
self.download_to_mirror(overwrite=False)

def upload_from_mirror(self, overwrite: bool = False):
"""Upload local file to S3."""
if not overwrite and self.exists_on_s3():
self.update_callback_on_skipped_transfer()
return
upload_to_key(self.s3_key, self.local_path, self.bucket, self.s3_client)
"""Upload local file or folder to S3."""
if self.is_file:
if not overwrite and self.exists_on_s3():
self.update_callback_on_skipped_transfer()
return
upload_to_key(self.s3_key, self.local_path, self.bucket, self.s3_client)
else:
for child in self.local_path.iterdir():
child_mp = self.get_child(child.name)
child_mp.upload_from_mirror(overwrite=overwrite)

def upload_from_mirror_if_not_present(self):
"""Upload from mirror if not present on S3."""
Expand Down
16 changes: 12 additions & 4 deletions S3MP/utils/s3_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -111,8 +111,9 @@ def upload_to_key(
Config=S3MPConfig.transfer_config, # type: ignore[arg-type]
)
else:
folder_key = key.rstrip("/")
for child in local_path.iterdir():
upload_to_key(f"{key}/{child.name}", child, bucket, client)
upload_to_key(f"{folder_key}/{child.name}", child, bucket, client)


def key_exists_on_s3(
Expand Down Expand Up @@ -180,8 +181,13 @@ def delete_child_keys_on_s3(
client = client or S3MPConfig.s3_client
assert bucket is not None
assert client is not None
for child_key in s3_list_child_keys(key, bucket, client):
client.delete_object(Bucket=bucket.name, Key=child_key)
folder_key = key if key.endswith("/") else f"{key}/"
for child_key in s3_list_child_keys(folder_key, bucket, client):
if child_key.endswith("/"):
delete_child_keys_on_s3(child_key, bucket, client)
client.delete_object(Bucket=bucket.name, Key=child_key)
else:
client.delete_object(Bucket=bucket.name, Key=child_key)


def delete_key_on_s3(
Expand All @@ -199,4 +205,6 @@ def delete_key_on_s3(
if key_is_file_on_s3(key, bucket, client):
client.delete_object(Bucket=bucket.name, Key=key)
else:
delete_child_keys_on_s3(key, bucket, client)
folder_key = key if key.endswith("/") else f"{key}/"
delete_child_keys_on_s3(folder_key, bucket, client)
client.delete_object(Bucket=bucket.name, Key=folder_key)
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[project]
name = "S3MP"
version = "0.9.0"
version = "0.9.1"
description = ""
authors = [
{name = "Joshua Dean", email = "joshua.dean@sentera.com"},
Expand Down
79 changes: 77 additions & 2 deletions tests/test_mirror_path.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
from S3MP.global_config import S3MPConfig
from S3MP.mirror_path import MirrorPath
from S3MP.types import S3Client
from S3MP.utils.s3_utils import delete_key_on_s3

TESTING_BUCKET_NAME = "s3mp-testing"

Expand All @@ -34,11 +35,15 @@ def _configure_s3mp(tmp_path):

@pytest.fixture()
def s3_client():
"""Create an S3 client and ensure the test bucket exists."""
"""Create an S3 client, ensure the test bucket exists, and clean up after the test."""
client: S3Client = boto3.client("s3")
with contextlib.suppress(client.exceptions.BucketAlreadyOwnedByYou):
client.create_bucket(Bucket=TESTING_BUCKET_NAME)
return client
yield client
# Teardown: delete test data from S3 (local mirror is cleaned up by tmp_path)
bucket = boto3.resource("s3").Bucket(TESTING_BUCKET_NAME)
for prefix in ["mp_test/", "mr_prop/"]:
delete_key_on_s3(prefix, bucket, client)


# ── from_s3_key construction ──────────────────────────────────────────────
Expand Down Expand Up @@ -666,6 +671,76 @@ def test_copy_to_mp_from_mirror(self, s3_client, tmp_path):
assert mp_dest.exists_on_s3()
assert mp_dest.local_path.read_bytes() == b"mirror_src_data"

def test_upload_from_mirror_folder_uploads_children(self, s3_client, tmp_path):
"""upload_from_mirror on a folder should upload all child files."""
folder_mp = MirrorPath.from_s3_key("mp_test/upload_folder/")
folder_mp.local_path.mkdir(parents=True, exist_ok=True)
(folder_mp.local_path / "x.txt").write_bytes(b"x_data")
(folder_mp.local_path / "y.txt").write_bytes(b"y_data")

folder_mp.upload_from_mirror(overwrite=True)

child_x = folder_mp.get_child("x.txt")
child_y = folder_mp.get_child("y.txt")
assert child_x.exists_on_s3()
assert child_y.exists_on_s3()

def test_upload_from_mirror_folder_no_double_slash(self, s3_client, tmp_path):
"""upload_from_mirror on a folder must not produce double-slash S3 keys."""
folder_mp = MirrorPath.from_s3_key("mp_test/up_nods/")
folder_mp.local_path.mkdir(parents=True, exist_ok=True)
(folder_mp.local_path / "file.txt").write_bytes(b"data")

folder_mp.upload_from_mirror(overwrite=True)

child_mp = folder_mp.get_child("file.txt")
assert "//" not in child_mp.s3_key
assert child_mp.exists_on_s3()

def test_upload_from_mirror_folder_nested(self, s3_client, tmp_path):
"""upload_from_mirror recursively uploads nested subdirectories."""
folder_mp = MirrorPath.from_s3_key("mp_test/up_nested/")
folder_mp.local_path.mkdir(parents=True, exist_ok=True)
(folder_mp.local_path / "top.txt").write_bytes(b"top")
sub = folder_mp.local_path / "sub"
sub.mkdir()
(sub / "deep.txt").write_bytes(b"deep")

folder_mp.upload_from_mirror(overwrite=True)

top_mp = folder_mp.get_child("top.txt")
sub_mp = folder_mp.get_child("sub")
deep_mp = sub_mp.get_child("deep.txt")
assert top_mp.exists_on_s3()
assert deep_mp.exists_on_s3()

def test_upload_from_mirror_folder_overwrite_false_still_uploads_new_children(
self, s3_client, tmp_path
):
"""When overwrite=False and the folder prefix exists, new child files are still uploaded."""
# Upload one child first so the prefix exists on S3
folder_mp = MirrorPath.from_s3_key("mp_test/up_partial/")
folder_mp.local_path.mkdir(parents=True, exist_ok=True)

existing_mp = folder_mp.get_child("existing.txt")
existing_mp.local_path.parent.mkdir(parents=True, exist_ok=True)
existing_mp.local_path.write_bytes(b"original")
existing_mp.upload_from_mirror(overwrite=True)
assert existing_mp.exists_on_s3()

# Now add a new file locally and re-upload with overwrite=False
(folder_mp.local_path / "new_file.txt").write_bytes(b"new_data")

folder_mp.upload_from_mirror(overwrite=False)

# The new file should have been uploaded
new_mp = folder_mp.get_child("new_file.txt")
assert new_mp.exists_on_s3()

# The existing file should not have been re-uploaded (still original)
existing_mp.download_to_mirror(overwrite=True)
assert existing_mp.local_path.read_bytes() == b"original"


def _write_text(path: str, data: str):
"""Helper: write text to a file."""
Expand Down
6 changes: 5 additions & 1 deletion tests/test_prefix_queries.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
get_folders_within_folder,
)
from S3MP.types import S3Client
from S3MP.utils.s3_utils import delete_key_on_s3

TESTING_BUCKET_NAME = "s3mp-testing"

Expand All @@ -32,7 +33,10 @@ def s3_env():
client.put_object(Bucket=TESTING_BUCKET_NAME, Key="pq_test/sub_one/", Body=b"")
client.put_object(Bucket=TESTING_BUCKET_NAME, Key="pq_test/sub_one/nested.txt", Body=b"n")
client.put_object(Bucket=TESTING_BUCKET_NAME, Key="pq_test/sub_two/", Body=b"")
return client
yield client
# Teardown
bucket = boto3.resource("s3").Bucket(TESTING_BUCKET_NAME)
delete_key_on_s3("pq_test/", bucket, client)


# ── get_files_within_folder ───────────────────────────────────────────────
Expand Down
75 changes: 73 additions & 2 deletions tests/test_s3_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -141,6 +141,9 @@ def test_folders_files():
for key in keys_that_are_folders:
assert not key_is_file_on_s3(key, bucket, client)

# Cleanup
delete_key_on_s3("test_folder/", bucket, client)


if __name__ == "__main__":
test_folders_files()
Expand All @@ -151,12 +154,26 @@ def test_folders_files():

@pytest.fixture()
def s3_env():
"""Create a client, resource, and bucket for S3 tests."""
"""Create a client, resource, and bucket for S3 tests; clean up afterward."""
client: S3Client = boto3.client("s3")
with contextlib.suppress(client.exceptions.BucketAlreadyOwnedByYou):
client.create_bucket(Bucket=TESTING_BUCKET_NAME)
bucket = boto3.resource("s3").Bucket(TESTING_BUCKET_NAME)
return client, bucket
yield client, bucket
# Teardown: delete all objects under known test prefixes
for prefix in [
"single/",
"single_multi/",
"size/",
"dl_up/",
"up_folder/",
"up_nested/",
"del/",
"del_folder/",
"del_nested/",
"del_children/",
]:
delete_key_on_s3(prefix, bucket, client)


# ── s3_list_single_key ────────────────────────────────────────────────────
Expand Down Expand Up @@ -242,6 +259,39 @@ def test_download_nonexistent_raises(self, s3_env, tmp_path):
with pytest.raises(ValueError, match="does not exist"):
download_key("dl_up/no_such.txt", tmp_path / "out.txt", bucket, client)

def test_upload_folder_no_double_slash(self, s3_env, tmp_path):
"""upload_to_key with a trailing-slash key must not produce double slashes."""
client, bucket = s3_env
folder = tmp_path / "my_folder"
folder.mkdir()
(folder / "a.txt").write_bytes(b"aaa")
(folder / "b.txt").write_bytes(b"bbb")

upload_to_key("up_folder/", folder, bucket, client)

# Keys should NOT contain "//"
assert key_exists_on_s3("up_folder/a.txt", bucket, client)
assert key_exists_on_s3("up_folder/b.txt", bucket, client)
assert not key_exists_on_s3("up_folder//a.txt", bucket, client)

def test_upload_nested_folder(self, s3_env, tmp_path):
"""upload_to_key recursively uploads nested directories."""
client, bucket = s3_env
root = tmp_path / "nested_root"
root.mkdir()
(root / "top.txt").write_bytes(b"top")
sub = root / "sub"
sub.mkdir()
(sub / "deep.txt").write_bytes(b"deep")

upload_to_key("up_nested/", root, bucket, client)

assert key_exists_on_s3("up_nested/top.txt", bucket, client)
assert key_exists_on_s3("up_nested/sub/deep.txt", bucket, client)
# No double slashes
assert not key_exists_on_s3("up_nested//top.txt", bucket, client)
assert not key_exists_on_s3("up_nested//sub/deep.txt", bucket, client)


# ── delete_key_on_s3 / delete_child_keys_on_s3 ───────────────────────────

Expand Down Expand Up @@ -269,6 +319,27 @@ def test_delete_folder_removes_children(self, s3_env):
delete_key_on_s3("del_folder/", bucket, client)
assert not key_exists_on_s3("del_folder/a.txt", bucket, client)
assert not key_exists_on_s3("del_folder/b.txt", bucket, client)
assert not key_exists_on_s3("del_folder/", bucket, client)

def test_delete_folder_removes_nested_children(self, s3_env):
"""delete_key_on_s3 must recursively remove nested subfolders and their contents."""
client, bucket = s3_env
client.put_object(Bucket=TESTING_BUCKET_NAME, Key="del_nested/", Body=b"")
client.put_object(Bucket=TESTING_BUCKET_NAME, Key="del_nested/top.txt", Body=b"top")
client.put_object(Bucket=TESTING_BUCKET_NAME, Key="del_nested/sub/", Body=b"")
client.put_object(Bucket=TESTING_BUCKET_NAME, Key="del_nested/sub/deep.txt", Body=b"deep")
client.put_object(Bucket=TESTING_BUCKET_NAME, Key="del_nested/sub/deeper/", Body=b"")
client.put_object(
Bucket=TESTING_BUCKET_NAME, Key="del_nested/sub/deeper/bottom.txt", Body=b"bottom"
)

delete_key_on_s3("del_nested/", bucket, client)

assert not key_exists_on_s3("del_nested/top.txt", bucket, client)
assert not key_exists_on_s3("del_nested/sub/deep.txt", bucket, client)
assert not key_exists_on_s3("del_nested/sub/deeper/bottom.txt", bucket, client)
assert not key_exists_on_s3("del_nested/sub/", bucket, client)
assert not key_exists_on_s3("del_nested/", bucket, client)

def test_delete_child_keys(self, s3_env):
client, bucket = s3_env
Expand Down
2 changes: 1 addition & 1 deletion uv.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Loading