1
0
forked from github/onyx

don't yield expected auth errors (#4494)

* don't yield expected auth errors

* only catch 403s
This commit is contained in:
evan-danswer
2025-04-09 18:53:02 -07:00
committed by GitHub
parent b5be1fb948
commit e79134eaa0

View File

@@ -4,6 +4,7 @@ from datetime import datetime
from datetime import timezone
from googleapiclient.discovery import Resource # type: ignore
from googleapiclient.errors import HttpError # type: ignore
from onyx.connectors.google_drive.constants import DRIVE_FOLDER_TYPE
from onyx.connectors.google_drive.constants import DRIVE_SHORTCUT_TYPE
@@ -131,17 +132,26 @@ def crawl_folders_for_files(
completion_stage=DriveRetrievalStage.FOLDER_FILES,
)
# Only mark a folder as done if it was fully traversed without errors
# This usually indicates that the owner of the folder was impersonated.
# In cases where this never happens, most likely the folder owner is
# not part of the google workspace in question (or for oauth, the authenticated
# user doesn't own the folder)
if found_files:
update_traversed_ids_func(parent_id)
except Exception as e:
logger.error(f"Error getting files in parent {parent_id}: {e}")
yield RetrievedDriveFile(
drive_file=file,
user_email=user_email,
parent_id=parent_id,
completion_stage=DriveRetrievalStage.FOLDER_FILES,
error=e,
)
if isinstance(e, HttpError) and e.status_code == 403:
# don't yield an error here because this is expected behavior
# when a user doesn't have access to a folder
logger.debug(f"Error getting files in parent {parent_id}: {e}")
else:
logger.error(f"Error getting files in parent {parent_id}: {e}")
yield RetrievedDriveFile(
drive_file=file,
user_email=user_email,
parent_id=parent_id,
completion_stage=DriveRetrievalStage.FOLDER_FILES,
error=e,
)
else:
logger.info(f"Skipping subfolder files since already traversed: {parent_id}")