Compare commits

...

2 Commits

Author SHA1 Message Date
Evan Lohn
f5e4a2f653 timestamp format fix 2025-03-20 16:33:38 -07:00
Weves
4b33c68988 Reduce drive retries 2025-03-20 15:41:53 -07:00
3 changed files with 14 additions and 8 deletions

View File

@@ -1097,7 +1097,9 @@ class GoogleDriveConnector(SlimConnector, CheckpointConnector[GoogleDriveCheckpo
drive_service.files().list(pageSize=1, fields="files(id)").execute()
if isinstance(self._creds, ServiceAccountCredentials):
retry_builder()(get_root_folder_id)(drive_service)
# default is ~17mins of retries, don't do that here since this is called from
# the UI
retry_builder(tries=3, delay=0.1)(get_root_folder_id)(drive_service)
except HttpError as e:
status_code = e.resp.status if e.resp else None

View File

@@ -1,6 +1,7 @@
from collections.abc import Callable
from collections.abc import Iterator
from datetime import datetime
from datetime import timezone
from googleapiclient.discovery import Resource # type: ignore
@@ -36,12 +37,12 @@ def _generate_time_range_filter(
) -> str:
time_range_filter = ""
if start is not None:
time_start = datetime.utcfromtimestamp(start).isoformat() + "Z"
time_start = datetime.fromtimestamp(start, tz=timezone.utc).isoformat()
time_range_filter += (
f" and {GoogleFields.MODIFIED_TIME.value} >= '{time_start}'"
)
if end is not None:
time_stop = datetime.utcfromtimestamp(end).isoformat() + "Z"
time_stop = datetime.fromtimestamp(end, tz=timezone.utc).isoformat()
time_range_filter += f" and {GoogleFields.MODIFIED_TIME.value} <= '{time_stop}'"
return time_range_filter

View File

@@ -17,9 +17,12 @@ logger = setup_logger()
# Google Drive APIs are quite flakey and may 500 for an
# extended period of time. Trying to combat here by adding a very
# long retry period (~20 minutes of trying every minute)
add_retries = retry_builder(tries=50, max_delay=30)
# extended period of time. This is now addressed by checkpointing.
#
# NOTE: We previously tried to combat this here by adding a very
# long retry period (~20 minutes of trying, one request a minute.)
# This is no longer necessary due to checkpointing.
add_retries = retry_builder(tries=5, max_delay=10)
NEXT_PAGE_TOKEN_KEY = "nextPageToken"
PAGE_TOKEN_KEY = "pageToken"
@@ -37,14 +40,14 @@ class GoogleFields(str, Enum):
def _execute_with_retry(request: Any) -> Any:
max_attempts = 10
max_attempts = 6
attempt = 1
while attempt < max_attempts:
# Note for reasons unknown, the Google API will sometimes return a 429
# and even after waiting the retry period, it will return another 429.
# It could be due to a few possibilities:
# 1. Other things are also requesting from the Gmail API with the same key
# 1. Other things are also requesting from the Drive/Gmail API with the same key
# 2. It's a rolling rate limit so the moment we get some amount of requests cleared, we hit it again very quickly
# 3. The retry-after has a maximum and we've already hit the limit for the day
# or it's something else...