Compare commits

...

1 Commits

Author SHA1 Message Date
Weves
18f13bd2ed Fix pre-commit to exclude .venv directory from lazy import check 2025-09-25 12:59:49 -07:00
3 changed files with 17 additions and 1 deletions

View File

@@ -43,7 +43,7 @@ repos:
name: Check lazy imports are not directly imported
entry: python3 backend/scripts/check_lazy_imports.py
language: system
files: ^backend/.*\.py$
files: ^backend/(?!\.venv/).*\.py$
pass_filenames: false
# We would like to have a mypy pre-commit hook, but due to the fact that

View File

@@ -57,6 +57,8 @@ from onyx.connectors.sharepoint.connector_utils import get_sharepoint_external_a
from onyx.file_processing.extract_file_text import ACCEPTED_IMAGE_FILE_EXTENSIONS
from onyx.file_processing.extract_file_text import extract_text_and_images
from onyx.file_processing.extract_file_text import get_file_ext
from onyx.file_processing.extract_file_text import is_accepted_file_ext
from onyx.file_processing.extract_file_text import OnyxExtensionType
from onyx.file_processing.file_validation import EXCLUDED_IMAGE_TYPES
from onyx.file_processing.image_utils import store_image_and_create_section
from onyx.utils.b64 import get_image_type_from_bytes
@@ -1441,6 +1443,12 @@ class SharepointConnector(
)
for driveitem in driveitems:
driveitem_extension = get_file_ext(driveitem.name)
if not is_accepted_file_ext(driveitem_extension, OnyxExtensionType.All):
logger.warning(
f"Skipping {driveitem.web_url} as it is not a supported file type"
)
continue
# Only yield empty documents if they are PDFs or images
should_yield_if_empty = (
driveitem_extension in ACCEPTED_IMAGE_FILE_EXTENSIONS
@@ -1464,6 +1472,10 @@ class SharepointConnector(
TextSection(link=driveitem.web_url, text="")
]
yield doc
else:
logger.warning(
f"Skipping {driveitem.web_url} as it is empty and not a PDF or image"
)
except Exception as e:
logger.warning(
f"Failed to process driveitem {driveitem.web_url}: {e}"

View File

@@ -104,6 +104,10 @@ def find_python_files(
if ignore_directories is None:
ignore_directories = set()
# Always ignore virtual environment directories
venv_dirs = {".venv", "venv", ".env", "env", "__pycache__"}
ignore_directories = ignore_directories.union(venv_dirs)
python_files = []
for file_path in backend_dir.glob("**/*.py"):
# Skip test files (they can contain test imports)