Skip to content

Commit 6b22574

Browse files
committed
also retry index download on exists-in-archive, change locking
1 parent 5eed146 commit 6b22574

File tree

1 file changed

+32
-52
lines changed

1 file changed

+32
-52
lines changed

src/storage/mod.rs

Lines changed: 32 additions & 52 deletions
Original file line numberDiff line numberDiff line change
@@ -383,19 +383,29 @@ impl AsyncStorage {
383383
latest_build_id: Option<BuildId>,
384384
path: &str,
385385
) -> Result<bool> {
386-
match self
387-
.find_in_archive_index(archive_path, latest_build_id, path)
388-
.await
389-
{
390-
Ok(file_info) => Ok(file_info.is_some()),
391-
Err(err) => {
392-
if err.downcast_ref::<PathNotFoundError>().is_some() {
393-
Ok(false)
394-
} else {
395-
Err(err)
386+
for attempt in 0..2 {
387+
match self
388+
.find_in_archive_index(archive_path, latest_build_id, path)
389+
.await
390+
{
391+
Ok(file_info) => return Ok(file_info.is_some()),
392+
Err(err) if err.downcast_ref::<PathNotFoundError>().is_some() => {
393+
return Ok(false);
394+
}
395+
Err(err) if attempt == 0 => {
396+
warn!(
397+
?err,
398+
"error fetching range from archive, purging local index cache and retrying once"
399+
);
400+
self.purge_archive_index_cache(archive_path, latest_build_id)
401+
.await?;
402+
403+
continue;
396404
}
405+
Err(err) => return Err(err),
397406
}
398407
}
408+
unreachable!("exists_in_archive retry loop exited unexpectedly");
399409
}
400410

401411
/// get, decompress and materialize an object from store
@@ -557,54 +567,24 @@ impl AsyncStorage {
557567
}
558568

559569
let lock = self.local_index_cache_lock(&local_index_path);
570+
let write_guard = lock.lock().await;
560571

561-
// At this point we know the index is missing or broken.
562-
// Try to become the "downloader" without queueing as a writer.
563-
if let Ok(write_guard) = lock.try_lock() {
564-
// Double-check: maybe someone fixed it between our first failure and now.
565-
if let Ok(res) = archive_index::find_in_file(&local_index_path, path_in_archive).await {
566-
return Ok(res);
567-
}
568-
569-
let remote_index_path = format!("{archive_path}.{ARCHIVE_INDEX_FILE_EXTENSION}");
570-
571-
// We are the repairer: download fresh index into place.
572-
self.download_archive_index(&local_index_path, &remote_index_path)
573-
.await?;
574-
575-
// Write lock is dropped here (end of scope), so others can proceed.
576-
drop(write_guard);
577-
578-
// Final attempt: if this still fails, bubble the error.
579-
return archive_index::find_in_file(local_index_path, path_in_archive).await;
572+
// Double-check: maybe someone fixed it between our first failure and now.
573+
if let Ok(res) = archive_index::find_in_file(&local_index_path, path_in_archive).await {
574+
return Ok(res);
580575
}
581576

582-
// Someone else is already downloading/repairing. Don't queue on write(); just wait
583-
// a bit and poll the fast path until it becomes readable or we give up.
584-
const STEP_MS: u64 = 10;
585-
const ATTEMPTS: u64 = 50; // = 500ms total wait
586-
const TOTAL_WAIT_MS: u64 = STEP_MS * ATTEMPTS;
577+
let remote_index_path = format!("{archive_path}.{ARCHIVE_INDEX_FILE_EXTENSION}");
587578

588-
let mut last_err = None;
589-
590-
for _ in 0..ATTEMPTS {
591-
sleep(Duration::from_millis(STEP_MS)).await;
579+
// We are the repairer: download fresh index into place.
580+
self.download_archive_index(&local_index_path, &remote_index_path)
581+
.await?;
592582

593-
match archive_index::find_in_file(local_index_path.clone(), path_in_archive).await {
594-
Ok(res) => return Ok(res),
595-
Err(err) => {
596-
// keep waiting; repair may still be in progress
597-
last_err = Some(err);
598-
}
599-
}
600-
}
583+
// Write lock is dropped here (end of scope), so others can proceed.
584+
drop(write_guard);
601585

602-
// Still not usable after waiting: return the last error we saw.
603-
Err(last_err
604-
.unwrap_or_else(|| anyhow!("archive index unavailable after repair wait"))
605-
.context(format!(
606-
"no archive index after waiting for {TOTAL_WAIT_MS}ms"
607-
)))
586+
// Final attempt: if this still fails, bubble the error.
587+
return archive_index::find_in_file(local_index_path, path_in_archive).await;
608588
}
609589

610590
#[instrument]

0 commit comments

Comments
 (0)