mirror of
https://github.com/onyx-dot-app/onyx.git
synced 2026-02-24 03:05:48 +00:00
Compare commits
27 Commits
ci_script
...
v0.20.0-cl
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
43404ed0e6 | ||
|
|
4fcfeb1b16 | ||
|
|
fcf99ed11b | ||
|
|
83995dac53 | ||
|
|
bcb33ebf74 | ||
|
|
1f8edbc94c | ||
|
|
1005d3f3c5 | ||
|
|
d2947a573e | ||
|
|
53ab977ffa | ||
|
|
33f08ff489 | ||
|
|
2c0ef9dfcc | ||
|
|
d7152b540a | ||
|
|
cac503bf11 | ||
|
|
c5fdd201e1 | ||
|
|
da1f128763 | ||
|
|
357d3d1963 | ||
|
|
4f6d115c1e | ||
|
|
11e4e4f65a | ||
|
|
18d34401f0 | ||
|
|
49ed7bf5b4 | ||
|
|
a473f5b0d5 | ||
|
|
601a42575d | ||
|
|
2e462af29b | ||
|
|
4d0b926b66 | ||
|
|
504b944477 | ||
|
|
39f137ac10 | ||
|
|
00071fc399 |
@@ -0,0 +1,27 @@
|
||||
"""Add composite index for last_modified and last_synced to document
|
||||
|
||||
Revision ID: f13db29f3101
|
||||
Revises: b388730a2899
|
||||
Create Date: 2025-02-18 22:48:11.511389
|
||||
|
||||
"""
|
||||
from alembic import op
|
||||
|
||||
# revision identifiers, used by Alembic.
|
||||
revision = "f13db29f3101"
|
||||
down_revision = "acaab4ef4507"
|
||||
branch_labels: str | None = None
|
||||
depends_on: str | None = None
|
||||
|
||||
|
||||
def upgrade() -> None:
|
||||
op.create_index(
|
||||
"ix_document_sync_status",
|
||||
"document",
|
||||
["last_modified", "last_synced"],
|
||||
unique=False,
|
||||
)
|
||||
|
||||
|
||||
def downgrade() -> None:
|
||||
op.drop_index("ix_document_sync_status", table_name="document")
|
||||
@@ -42,8 +42,10 @@ from onyx.configs.constants import OnyxCeleryTask
|
||||
from onyx.configs.constants import OnyxRedisConstants
|
||||
from onyx.configs.constants import OnyxRedisLocks
|
||||
from onyx.configs.constants import OnyxRedisSignals
|
||||
from onyx.connectors.factory import validate_ccpair_for_user
|
||||
from onyx.db.connector import mark_cc_pair_as_permissions_synced
|
||||
from onyx.db.connector_credential_pair import get_connector_credential_pair_from_id
|
||||
from onyx.db.connector_credential_pair import update_connector_credential_pair
|
||||
from onyx.db.document import upsert_document_by_connector_credential_pair
|
||||
from onyx.db.engine import get_session_with_current_tenant
|
||||
from onyx.db.enums import AccessType
|
||||
@@ -193,12 +195,19 @@ def check_for_doc_permissions_sync(self: Task, *, tenant_id: str) -> bool | None
|
||||
monitor_ccpair_permissions_taskset(
|
||||
tenant_id, key_bytes, r, db_session
|
||||
)
|
||||
task_logger.info(f"check_for_doc_permissions_sync finished: tenant={tenant_id}")
|
||||
except SoftTimeLimitExceeded:
|
||||
task_logger.info(
|
||||
"Soft time limit exceeded, task is being terminated gracefully."
|
||||
)
|
||||
except Exception:
|
||||
task_logger.exception(f"Unexpected exception: tenant={tenant_id}")
|
||||
except Exception as e:
|
||||
error_msg = str(e).replace("\n", " ")
|
||||
task_logger.warning(
|
||||
f"Unexpected check_for_doc_permissions_sync exception: tenant={tenant_id} {error_msg}"
|
||||
)
|
||||
task_logger.exception(
|
||||
f"Unexpected check_for_doc_permissions_sync exception: tenant={tenant_id}"
|
||||
)
|
||||
finally:
|
||||
if lock_beat.owned():
|
||||
lock_beat.release()
|
||||
@@ -282,13 +291,19 @@ def try_creating_permissions_sync_task(
|
||||
redis_connector.permissions.set_fence(payload)
|
||||
|
||||
payload_id = payload.id
|
||||
except Exception:
|
||||
task_logger.exception(f"Unexpected exception: cc_pair={cc_pair_id}")
|
||||
except Exception as e:
|
||||
error_msg = str(e).replace("\n", " ")
|
||||
task_logger.warning(
|
||||
f"Unexpected try_creating_permissions_sync_task exception: cc_pair={cc_pair_id} {error_msg}"
|
||||
)
|
||||
return None
|
||||
finally:
|
||||
if lock.owned():
|
||||
lock.release()
|
||||
|
||||
task_logger.info(
|
||||
f"try_creating_permissions_sync_task finished: cc_pair={cc_pair_id} payload_id={payload_id}"
|
||||
)
|
||||
return payload_id
|
||||
|
||||
|
||||
@@ -388,6 +403,31 @@ def connector_permission_sync_generator_task(
|
||||
f"No connector credential pair found for id: {cc_pair_id}"
|
||||
)
|
||||
|
||||
try:
|
||||
created = validate_ccpair_for_user(
|
||||
cc_pair.connector.id,
|
||||
cc_pair.credential.id,
|
||||
db_session,
|
||||
None,
|
||||
tenant_id,
|
||||
enforce_creation=False,
|
||||
)
|
||||
if not created:
|
||||
task_logger.warning(
|
||||
f"Unable to create connector credential pair for id: {cc_pair_id}"
|
||||
)
|
||||
except Exception:
|
||||
task_logger.exception(
|
||||
f"validate_ccpair_permissions_sync exceptioned: cc_pair={cc_pair_id}"
|
||||
)
|
||||
update_connector_credential_pair(
|
||||
db_session=db_session,
|
||||
connector_id=cc_pair.connector.id,
|
||||
credential_id=cc_pair.credential.id,
|
||||
status=ConnectorCredentialPairStatus.INVALID,
|
||||
)
|
||||
raise
|
||||
|
||||
source_type = cc_pair.connector.source
|
||||
|
||||
doc_sync_func = DOC_PERMISSIONS_FUNC_MAP.get(source_type)
|
||||
@@ -439,6 +479,10 @@ def connector_permission_sync_generator_task(
|
||||
redis_connector.permissions.generator_complete = tasks_generated
|
||||
|
||||
except Exception as e:
|
||||
error_msg = str(e).replace("\n", " ")
|
||||
task_logger.warning(
|
||||
f"Permission sync exceptioned: cc_pair={cc_pair_id} payload_id={payload_id} {error_msg}"
|
||||
)
|
||||
task_logger.exception(
|
||||
f"Permission sync exceptioned: cc_pair={cc_pair_id} payload_id={payload_id}"
|
||||
)
|
||||
@@ -512,13 +556,20 @@ def update_external_document_permissions_task(
|
||||
f"elapsed={elapsed:.2f}"
|
||||
)
|
||||
|
||||
except Exception:
|
||||
except Exception as e:
|
||||
error_msg = str(e).replace("\n", " ")
|
||||
task_logger.warning(
|
||||
f"Exception in update_external_document_permissions_task: connector_id={connector_id} doc_id={doc_id} {error_msg}"
|
||||
)
|
||||
task_logger.exception(
|
||||
f"Exception in update_external_document_permissions_task: "
|
||||
f"connector_id={connector_id} doc_id={doc_id}"
|
||||
)
|
||||
return False
|
||||
|
||||
task_logger.info(
|
||||
f"update_external_document_permissions_task finished: connector_id={connector_id} doc_id={doc_id}"
|
||||
)
|
||||
return True
|
||||
|
||||
|
||||
|
||||
@@ -195,12 +195,17 @@ def check_for_external_group_sync(self: Task, *, tenant_id: str | None) -> bool
|
||||
task_logger.info(
|
||||
"Soft time limit exceeded, task is being terminated gracefully."
|
||||
)
|
||||
except Exception:
|
||||
except Exception as e:
|
||||
error_msg = str(e).replace("\n", " ")
|
||||
task_logger.warning(
|
||||
f"Unexpected check_for_external_group_sync exception: tenant={tenant_id} {error_msg}"
|
||||
)
|
||||
task_logger.exception(f"Unexpected exception: tenant={tenant_id}")
|
||||
finally:
|
||||
if lock_beat.owned():
|
||||
lock_beat.release()
|
||||
|
||||
task_logger.info(f"check_for_external_group_sync finished: tenant={tenant_id}")
|
||||
return True
|
||||
|
||||
|
||||
@@ -267,12 +272,19 @@ def try_creating_external_group_sync_task(
|
||||
redis_connector.external_group_sync.set_fence(payload)
|
||||
|
||||
payload_id = payload.id
|
||||
except Exception:
|
||||
except Exception as e:
|
||||
error_msg = str(e).replace("\n", " ")
|
||||
task_logger.warning(
|
||||
f"Unexpected try_creating_external_group_sync_task exception: cc_pair={cc_pair_id} {error_msg}"
|
||||
)
|
||||
task_logger.exception(
|
||||
f"Unexpected exception while trying to create external group sync task: cc_pair={cc_pair_id}"
|
||||
)
|
||||
return None
|
||||
|
||||
task_logger.info(
|
||||
f"try_creating_external_group_sync_task finished: cc_pair={cc_pair_id} payload_id={payload_id}"
|
||||
)
|
||||
return payload_id
|
||||
|
||||
|
||||
@@ -361,6 +373,7 @@ def connector_external_group_sync_generator_task(
|
||||
cc_pair = get_connector_credential_pair_from_id(
|
||||
db_session=db_session,
|
||||
cc_pair_id=cc_pair_id,
|
||||
eager_load_credential=True,
|
||||
)
|
||||
if cc_pair is None:
|
||||
raise ValueError(
|
||||
@@ -405,6 +418,14 @@ def connector_external_group_sync_generator_task(
|
||||
sync_status=SyncStatus.SUCCESS,
|
||||
)
|
||||
except Exception as e:
|
||||
error_msg = str(e).replace("\n", " ")
|
||||
task_logger.warning(
|
||||
f"External group sync exceptioned: cc_pair={cc_pair_id} payload_id={payload.id} {error_msg}"
|
||||
)
|
||||
task_logger.exception(
|
||||
f"External group sync exceptioned: cc_pair={cc_pair_id} payload_id={payload.id}"
|
||||
)
|
||||
|
||||
msg = f"External group sync exceptioned: cc_pair={cc_pair_id} payload_id={payload.id}"
|
||||
task_logger.exception(msg)
|
||||
emit_background_error(msg + f"\n\n{e}", cc_pair_id=cc_pair_id)
|
||||
|
||||
@@ -194,12 +194,14 @@ def check_for_pruning(self: Task, *, tenant_id: str | None) -> bool | None:
|
||||
task_logger.info(
|
||||
"Soft time limit exceeded, task is being terminated gracefully."
|
||||
)
|
||||
except Exception:
|
||||
except Exception as e:
|
||||
error_msg = str(e).replace("\n", " ")
|
||||
task_logger.warning(f"Unexpected pruning check exception: {error_msg}")
|
||||
task_logger.exception("Unexpected exception during pruning check")
|
||||
finally:
|
||||
if lock_beat.owned():
|
||||
lock_beat.release()
|
||||
|
||||
task_logger.info(f"check_for_pruning finished: tenant={tenant_id}")
|
||||
return True
|
||||
|
||||
|
||||
@@ -301,13 +303,19 @@ def try_creating_prune_generator_task(
|
||||
redis_connector.prune.set_fence(payload)
|
||||
|
||||
payload_id = payload.id
|
||||
except Exception:
|
||||
except Exception as e:
|
||||
error_msg = str(e).replace("\n", " ")
|
||||
task_logger.warning(
|
||||
f"Unexpected try_creating_prune_generator_task exception: cc_pair={cc_pair.id} {error_msg}"
|
||||
)
|
||||
task_logger.exception(f"Unexpected exception: cc_pair={cc_pair.id}")
|
||||
return None
|
||||
finally:
|
||||
if lock.owned():
|
||||
lock.release()
|
||||
|
||||
task_logger.info(
|
||||
f"try_creating_prune_generator_task finished: cc_pair={cc_pair.id} payload_id={payload_id}"
|
||||
)
|
||||
return payload_id
|
||||
|
||||
|
||||
|
||||
@@ -192,6 +192,8 @@ def document_by_cc_pair_cleanup_task(
|
||||
)
|
||||
return False
|
||||
|
||||
error_msg = str(e).replace("\n", " ")
|
||||
task_logger.warning(f"Unexpected exception: doc={document_id} {error_msg}")
|
||||
task_logger.exception(f"Unexpected exception: doc={document_id}")
|
||||
|
||||
if self.request.retries < DOCUMENT_BY_CC_PAIR_CLEANUP_MAX_RETRIES:
|
||||
@@ -219,6 +221,7 @@ def document_by_cc_pair_cleanup_task(
|
||||
mark_document_as_modified(document_id, db_session)
|
||||
return False
|
||||
|
||||
task_logger.info(f"document_by_cc_pair_cleanup_task finished: doc={document_id}")
|
||||
return True
|
||||
|
||||
|
||||
|
||||
@@ -7,15 +7,22 @@ from typing import Optional
|
||||
|
||||
import boto3 # type: ignore
|
||||
from botocore.client import Config # type: ignore
|
||||
from botocore.exceptions import ClientError
|
||||
from botocore.exceptions import NoCredentialsError
|
||||
from botocore.exceptions import PartialCredentialsError
|
||||
from mypy_boto3_s3 import S3Client # type: ignore
|
||||
|
||||
from onyx.configs.app_configs import INDEX_BATCH_SIZE
|
||||
from onyx.configs.constants import BlobType
|
||||
from onyx.configs.constants import DocumentSource
|
||||
from onyx.connectors.interfaces import ConnectorValidationError
|
||||
from onyx.connectors.interfaces import CredentialExpiredError
|
||||
from onyx.connectors.interfaces import GenerateDocumentsOutput
|
||||
from onyx.connectors.interfaces import InsufficientPermissionsError
|
||||
from onyx.connectors.interfaces import LoadConnector
|
||||
from onyx.connectors.interfaces import PollConnector
|
||||
from onyx.connectors.interfaces import SecondsSinceUnixEpoch
|
||||
from onyx.connectors.interfaces import UnexpectedError
|
||||
from onyx.connectors.models import ConnectorMissingCredentialError
|
||||
from onyx.connectors.models import Document
|
||||
from onyx.connectors.models import Section
|
||||
@@ -240,6 +247,73 @@ class BlobStorageConnector(LoadConnector, PollConnector):
|
||||
|
||||
return None
|
||||
|
||||
def validate_connector_settings(self) -> None:
|
||||
if self.s3_client is None:
|
||||
raise ConnectorMissingCredentialError(
|
||||
"Blob storage credentials not loaded."
|
||||
)
|
||||
|
||||
if not self.bucket_name:
|
||||
raise ConnectorValidationError(
|
||||
"No bucket name was provided in connector settings."
|
||||
)
|
||||
|
||||
try:
|
||||
# We only fetch one object/page as a light-weight validation step.
|
||||
# This ensures we trigger typical S3 permission checks (ListObjectsV2, etc.).
|
||||
self.s3_client.list_objects_v2(
|
||||
Bucket=self.bucket_name, Prefix=self.prefix, MaxKeys=1
|
||||
)
|
||||
|
||||
except NoCredentialsError:
|
||||
raise ConnectorMissingCredentialError(
|
||||
"No valid blob storage credentials found or provided to boto3."
|
||||
)
|
||||
except PartialCredentialsError:
|
||||
raise ConnectorMissingCredentialError(
|
||||
"Partial or incomplete blob storage credentials provided to boto3."
|
||||
)
|
||||
except ClientError as e:
|
||||
error_code = e.response["Error"].get("Code", "")
|
||||
status_code = e.response["ResponseMetadata"].get("HTTPStatusCode")
|
||||
|
||||
# Most common S3 error cases
|
||||
if error_code in [
|
||||
"AccessDenied",
|
||||
"InvalidAccessKeyId",
|
||||
"SignatureDoesNotMatch",
|
||||
]:
|
||||
if status_code == 403 or error_code == "AccessDenied":
|
||||
raise InsufficientPermissionsError(
|
||||
f"Insufficient permissions to list objects in bucket '{self.bucket_name}'. "
|
||||
"Please check your bucket policy and/or IAM policy."
|
||||
)
|
||||
if status_code == 401 or error_code == "SignatureDoesNotMatch":
|
||||
raise CredentialExpiredError(
|
||||
"Provided blob storage credentials appear invalid or expired."
|
||||
)
|
||||
|
||||
raise CredentialExpiredError(
|
||||
f"Credential issue encountered ({error_code})."
|
||||
)
|
||||
|
||||
if error_code == "NoSuchBucket" or status_code == 404:
|
||||
raise ConnectorValidationError(
|
||||
f"Bucket '{self.bucket_name}' does not exist or cannot be found."
|
||||
)
|
||||
|
||||
raise ConnectorValidationError(
|
||||
f"Unexpected S3 client error (code={error_code}, status={status_code}): {e}"
|
||||
)
|
||||
|
||||
except Exception as e:
|
||||
# Catch-all for anything not captured by the above
|
||||
# Since we are unsure of the error and it may not disable the connector,
|
||||
# raise an unexpected error (does not disable connector)
|
||||
raise UnexpectedError(
|
||||
f"Unexpected error during blob storage settings validation: {e}"
|
||||
)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
credentials_dict = {
|
||||
|
||||
@@ -4,6 +4,8 @@ from datetime import timezone
|
||||
from typing import Any
|
||||
from urllib.parse import quote
|
||||
|
||||
from requests.exceptions import HTTPError
|
||||
|
||||
from onyx.configs.app_configs import CONFLUENCE_CONNECTOR_LABELS_TO_SKIP
|
||||
from onyx.configs.app_configs import CONFLUENCE_TIMEZONE_OFFSET
|
||||
from onyx.configs.app_configs import CONTINUE_ON_CONNECTOR_FAILURE
|
||||
@@ -16,12 +18,16 @@ from onyx.connectors.confluence.utils import build_confluence_document_id
|
||||
from onyx.connectors.confluence.utils import datetime_from_string
|
||||
from onyx.connectors.confluence.utils import extract_text_from_confluence_html
|
||||
from onyx.connectors.confluence.utils import validate_attachment_filetype
|
||||
from onyx.connectors.interfaces import ConnectorValidationError
|
||||
from onyx.connectors.interfaces import CredentialExpiredError
|
||||
from onyx.connectors.interfaces import GenerateDocumentsOutput
|
||||
from onyx.connectors.interfaces import GenerateSlimDocumentOutput
|
||||
from onyx.connectors.interfaces import InsufficientPermissionsError
|
||||
from onyx.connectors.interfaces import LoadConnector
|
||||
from onyx.connectors.interfaces import PollConnector
|
||||
from onyx.connectors.interfaces import SecondsSinceUnixEpoch
|
||||
from onyx.connectors.interfaces import SlimConnector
|
||||
from onyx.connectors.interfaces import UnexpectedError
|
||||
from onyx.connectors.models import BasicExpertInfo
|
||||
from onyx.connectors.models import ConnectorMissingCredentialError
|
||||
from onyx.connectors.models import Document
|
||||
@@ -397,3 +403,34 @@ class ConfluenceConnector(LoadConnector, PollConnector, SlimConnector):
|
||||
callback.progress("retrieve_all_slim_documents", 1)
|
||||
|
||||
yield doc_metadata_list
|
||||
|
||||
def validate_connector_settings(self) -> None:
|
||||
if self._confluence_client is None:
|
||||
raise ConnectorMissingCredentialError("Confluence credentials not loaded.")
|
||||
|
||||
try:
|
||||
spaces = self._confluence_client.get_all_spaces(limit=1)
|
||||
except HTTPError as e:
|
||||
status_code = e.response.status_code if e.response else None
|
||||
if status_code == 401:
|
||||
raise CredentialExpiredError(
|
||||
"Invalid or expired Confluence credentials (HTTP 401)."
|
||||
)
|
||||
elif status_code == 403:
|
||||
raise InsufficientPermissionsError(
|
||||
"Insufficient permissions to access Confluence resources (HTTP 403)."
|
||||
)
|
||||
else:
|
||||
raise UnexpectedError(
|
||||
f"Unexpected Confluence error (status={status_code}): {e}"
|
||||
)
|
||||
except Exception as e:
|
||||
raise UnexpectedError(
|
||||
f"Unexpected error while validating Confluence settings: {e}"
|
||||
)
|
||||
|
||||
if not spaces or not spaces.get("results"):
|
||||
raise ConnectorValidationError(
|
||||
"No Confluence spaces found. Either your credentials lack permissions, or "
|
||||
"there truly are no spaces in this Confluence instance."
|
||||
)
|
||||
|
||||
@@ -186,7 +186,8 @@ def validate_ccpair_for_user(
|
||||
db_session: Session,
|
||||
user: User | None,
|
||||
tenant_id: str | None,
|
||||
) -> None:
|
||||
enforce_creation: bool = True,
|
||||
) -> bool:
|
||||
# Validate the connector settings
|
||||
connector = fetch_connector_by_id(connector_id, db_session)
|
||||
credential = fetch_credential_by_id_for_user(
|
||||
@@ -200,7 +201,7 @@ def validate_ccpair_for_user(
|
||||
raise ValueError("Connector not found")
|
||||
|
||||
if connector.source == DocumentSource.INGESTION_API:
|
||||
return
|
||||
return False
|
||||
|
||||
if not credential:
|
||||
raise ValueError("Credential not found")
|
||||
@@ -215,6 +216,10 @@ def validate_ccpair_for_user(
|
||||
tenant_id=tenant_id,
|
||||
)
|
||||
except Exception as e:
|
||||
raise ConnectorValidationError(str(e))
|
||||
if enforce_creation:
|
||||
raise ConnectorValidationError(str(e))
|
||||
else:
|
||||
return False
|
||||
|
||||
runnable_connector.validate_connector_settings()
|
||||
return True
|
||||
|
||||
@@ -36,12 +36,16 @@ from onyx.connectors.google_utils.shared_constants import ONYX_SCOPE_INSTRUCTION
|
||||
from onyx.connectors.google_utils.shared_constants import SCOPE_DOC_URL
|
||||
from onyx.connectors.google_utils.shared_constants import SLIM_BATCH_SIZE
|
||||
from onyx.connectors.google_utils.shared_constants import USER_FIELDS
|
||||
from onyx.connectors.interfaces import ConnectorValidationError
|
||||
from onyx.connectors.interfaces import CredentialExpiredError
|
||||
from onyx.connectors.interfaces import GenerateDocumentsOutput
|
||||
from onyx.connectors.interfaces import GenerateSlimDocumentOutput
|
||||
from onyx.connectors.interfaces import InsufficientPermissionsError
|
||||
from onyx.connectors.interfaces import LoadConnector
|
||||
from onyx.connectors.interfaces import PollConnector
|
||||
from onyx.connectors.interfaces import SecondsSinceUnixEpoch
|
||||
from onyx.connectors.interfaces import SlimConnector
|
||||
from onyx.connectors.models import ConnectorMissingCredentialError
|
||||
from onyx.indexing.indexing_heartbeat import IndexingHeartbeatInterface
|
||||
from onyx.utils.logger import setup_logger
|
||||
from onyx.utils.retry_wrapper import retry_builder
|
||||
@@ -137,7 +141,7 @@ class GoogleDriveConnector(LoadConnector, PollConnector, SlimConnector):
|
||||
"Please visit the docs for help with the new setup: "
|
||||
f"{SCOPE_DOC_URL}"
|
||||
)
|
||||
raise ValueError(
|
||||
raise ConnectorValidationError(
|
||||
"Google Drive connector received old input parameters. "
|
||||
"Please visit the docs for help with the new setup: "
|
||||
f"{SCOPE_DOC_URL}"
|
||||
@@ -151,7 +155,7 @@ class GoogleDriveConnector(LoadConnector, PollConnector, SlimConnector):
|
||||
and not my_drive_emails
|
||||
and not shared_drive_urls
|
||||
):
|
||||
raise ValueError(
|
||||
raise ConnectorValidationError(
|
||||
"Nothing to index. Please specify at least one of the following: "
|
||||
"include_shared_drives, include_my_drives, include_files_shared_with_me, "
|
||||
"shared_folder_urls, or my_drive_emails"
|
||||
@@ -220,7 +224,15 @@ class GoogleDriveConnector(LoadConnector, PollConnector, SlimConnector):
|
||||
return self._creds
|
||||
|
||||
def load_credentials(self, credentials: dict[str, Any]) -> dict[str, str] | None:
|
||||
self._primary_admin_email = credentials[DB_CREDENTIALS_PRIMARY_ADMIN_KEY]
|
||||
try:
|
||||
self._primary_admin_email = credentials[DB_CREDENTIALS_PRIMARY_ADMIN_KEY]
|
||||
except KeyError:
|
||||
logger.debug(f"Credentials: {credentials}") # TODO: remove
|
||||
raise ValueError(
|
||||
"Primary admin email missing, "
|
||||
"should not call this property "
|
||||
"before calling load_credentials"
|
||||
)
|
||||
|
||||
self._creds, new_creds_dict = get_google_creds(
|
||||
credentials=credentials,
|
||||
@@ -602,3 +614,65 @@ class GoogleDriveConnector(LoadConnector, PollConnector, SlimConnector):
|
||||
if MISSING_SCOPES_ERROR_STR in str(e):
|
||||
raise PermissionError(ONYX_SCOPE_INSTRUCTIONS) from e
|
||||
raise e
|
||||
|
||||
def validate_connector_settings(self) -> None:
|
||||
"""
|
||||
Validate that we can connect to Google Drive (and optionally Admin APIs) with the provided credentials.
|
||||
Attempts a small listing of files to confirm scope and access.
|
||||
|
||||
Raises:
|
||||
ConnectorMissingCredentialError: If no credentials are loaded.
|
||||
CredentialExpiredError: If credentials are invalid/expired (HTTP 401).
|
||||
InsufficientPermissionsError: If we lack the Drive scope or are otherwise denied (HTTP 403).
|
||||
ConnectorValidationError: Any other unexpected errors (e.g. missing domain, no files).
|
||||
"""
|
||||
if self._creds is None:
|
||||
raise ConnectorMissingCredentialError(
|
||||
"Google Drive credentials not loaded."
|
||||
)
|
||||
|
||||
if self._primary_admin_email is None:
|
||||
raise ConnectorValidationError(
|
||||
"Primary admin email not found in credentials. "
|
||||
"Ensure DB_CREDENTIALS_PRIMARY_ADMIN_KEY is set."
|
||||
)
|
||||
|
||||
try:
|
||||
# Try a minimal file listing to confirm we have scope and valid credentials
|
||||
drive_service = get_drive_service(self._creds, self._primary_admin_email)
|
||||
response = (
|
||||
drive_service.files().list(pageSize=1, fields="files(id)").execute()
|
||||
)
|
||||
|
||||
# If listing returns no files, that's OK for validation
|
||||
# but we've at least confirmed we have the necessary scopes and can connect.
|
||||
# If you *require* at least 1 file in Drive, you could handle that here.
|
||||
_ = response.get("files", [])
|
||||
|
||||
except HttpError as e:
|
||||
status_code = e.resp.status if e.resp else None
|
||||
if status_code == 401:
|
||||
raise CredentialExpiredError(
|
||||
"Invalid or expired Google Drive credentials (401)."
|
||||
)
|
||||
elif status_code == 403:
|
||||
# Could mean missing scopes or the account lacks permission
|
||||
raise InsufficientPermissionsError(
|
||||
"Google Drive app lacks required permissions (403). "
|
||||
"Please ensure the necessary scopes are granted."
|
||||
)
|
||||
else:
|
||||
raise ConnectorValidationError(
|
||||
f"Unexpected Google Drive error (status={status_code}): {e}"
|
||||
)
|
||||
|
||||
except Exception as e:
|
||||
# Check for scope-related hints from the error message
|
||||
if MISSING_SCOPES_ERROR_STR in str(e):
|
||||
raise InsufficientPermissionsError(
|
||||
"Google Drive credentials are missing required scopes. "
|
||||
f"{ONYX_SCOPE_INSTRUCTIONS}"
|
||||
)
|
||||
raise ConnectorValidationError(
|
||||
f"Unexpected error during Google Drive validation: {e}"
|
||||
)
|
||||
|
||||
@@ -87,16 +87,18 @@ class HubSpotConnector(LoadConnector, PollConnector):
|
||||
contact = api_client.crm.contacts.basic_api.get_by_id(
|
||||
contact_id=contact.id
|
||||
)
|
||||
associated_emails.append(contact.properties["email"])
|
||||
email = contact.properties.get("email")
|
||||
if email is not None:
|
||||
associated_emails.append(email)
|
||||
|
||||
if notes:
|
||||
for note in notes.results:
|
||||
note = api_client.crm.objects.notes.basic_api.get_by_id(
|
||||
note_id=note.id, properties=["content", "hs_body_preview"]
|
||||
)
|
||||
if note.properties["hs_body_preview"] is None:
|
||||
continue
|
||||
associated_notes.append(note.properties["hs_body_preview"])
|
||||
preview = note.properties.get("hs_body_preview")
|
||||
if preview is not None:
|
||||
associated_notes.append(preview)
|
||||
|
||||
associated_emails_str = " ,".join(associated_emails)
|
||||
associated_notes_str = " ".join(associated_notes)
|
||||
|
||||
@@ -20,9 +20,13 @@ from onyx.configs.app_configs import INDEX_BATCH_SIZE
|
||||
from onyx.configs.constants import DocumentSource
|
||||
from onyx.connectors.interfaces import CheckpointConnector
|
||||
from onyx.connectors.interfaces import CheckpointOutput
|
||||
from onyx.connectors.interfaces import ConnectorValidationError
|
||||
from onyx.connectors.interfaces import CredentialExpiredError
|
||||
from onyx.connectors.interfaces import GenerateSlimDocumentOutput
|
||||
from onyx.connectors.interfaces import InsufficientPermissionsError
|
||||
from onyx.connectors.interfaces import SecondsSinceUnixEpoch
|
||||
from onyx.connectors.interfaces import SlimConnector
|
||||
from onyx.connectors.interfaces import UnexpectedError
|
||||
from onyx.connectors.models import BasicExpertInfo
|
||||
from onyx.connectors.models import ConnectorCheckpoint
|
||||
from onyx.connectors.models import ConnectorFailure
|
||||
@@ -666,6 +670,56 @@ class SlackConnector(SlimConnector, CheckpointConnector):
|
||||
)
|
||||
return checkpoint
|
||||
|
||||
def validate_connector_settings(self) -> None:
|
||||
if self.client is None:
|
||||
raise ConnectorMissingCredentialError("Slack credentials not loaded.")
|
||||
|
||||
try:
|
||||
# Minimal API call to confirm we can list channels
|
||||
# We set limit=1 for a lightweight check
|
||||
response = self.client.conversations_list(limit=1, types=["public_channel"])
|
||||
# Just ensure Slack responded "ok: True"
|
||||
if not response.get("ok", False):
|
||||
error_msg = response.get("error", "Unknown error from Slack")
|
||||
if error_msg == "invalid_auth":
|
||||
raise ConnectorValidationError(
|
||||
f"Invalid or expired Slack bot token ({error_msg})."
|
||||
)
|
||||
elif error_msg == "not_authed":
|
||||
raise CredentialExpiredError(
|
||||
f"Invalid or expired Slack bot token ({error_msg})."
|
||||
)
|
||||
raise UnexpectedError(f"Slack API returned a failure: {error_msg}")
|
||||
|
||||
except SlackApiError as e:
|
||||
slack_error = e.response.get("error", "")
|
||||
if slack_error == "missing_scope":
|
||||
# The needed scope is typically "channels:read" or "groups:read"
|
||||
# for viewing channels. The error message might also contain the
|
||||
# specific scope needed vs. provided.
|
||||
raise InsufficientPermissionsError(
|
||||
"Slack bot token lacks the necessary scope to list channels. "
|
||||
"Please ensure your Slack app has 'channels:read' (or 'groups:read' for private channels) enabled."
|
||||
)
|
||||
elif slack_error == "invalid_auth":
|
||||
raise CredentialExpiredError(
|
||||
f"Invalid or expired Slack bot token ({slack_error})."
|
||||
)
|
||||
elif slack_error == "not_authed":
|
||||
raise CredentialExpiredError(
|
||||
f"Invalid or expired Slack bot token ({slack_error})."
|
||||
)
|
||||
else:
|
||||
# Generic Slack error
|
||||
raise UnexpectedError(
|
||||
f"Unexpected Slack error '{slack_error}' during settings validation."
|
||||
)
|
||||
except Exception as e:
|
||||
# Catch-all for unexpected exceptions
|
||||
raise UnexpectedError(
|
||||
f"Unexpected error during Slack settings validation: {e}"
|
||||
)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
import os
|
||||
|
||||
@@ -5,6 +5,7 @@ from typing import Any
|
||||
|
||||
import msal # type: ignore
|
||||
from office365.graph_client import GraphClient # type: ignore
|
||||
from office365.runtime.client_request_exception import ClientRequestException # type: ignore
|
||||
from office365.teams.channels.channel import Channel # type: ignore
|
||||
from office365.teams.chats.messages.message import ChatMessage # type: ignore
|
||||
from office365.teams.team import Team # type: ignore
|
||||
@@ -12,10 +13,14 @@ from office365.teams.team import Team # type: ignore
|
||||
from onyx.configs.app_configs import INDEX_BATCH_SIZE
|
||||
from onyx.configs.constants import DocumentSource
|
||||
from onyx.connectors.cross_connector_utils.miscellaneous_utils import time_str_to_utc
|
||||
from onyx.connectors.interfaces import ConnectorValidationError
|
||||
from onyx.connectors.interfaces import CredentialExpiredError
|
||||
from onyx.connectors.interfaces import GenerateDocumentsOutput
|
||||
from onyx.connectors.interfaces import InsufficientPermissionsError
|
||||
from onyx.connectors.interfaces import LoadConnector
|
||||
from onyx.connectors.interfaces import PollConnector
|
||||
from onyx.connectors.interfaces import SecondsSinceUnixEpoch
|
||||
from onyx.connectors.interfaces import UnexpectedError
|
||||
from onyx.connectors.models import BasicExpertInfo
|
||||
from onyx.connectors.models import ConnectorMissingCredentialError
|
||||
from onyx.connectors.models import Document
|
||||
@@ -279,6 +284,64 @@ class TeamsConnector(LoadConnector, PollConnector):
|
||||
end_datetime = datetime.fromtimestamp(end, timezone.utc)
|
||||
return self._fetch_from_teams(start=start_datetime, end=end_datetime)
|
||||
|
||||
def validate_connector_settings(self) -> None:
|
||||
"""
|
||||
Validate that we can connect to Microsoft Teams with the provided MSAL/Graph credentials
|
||||
and that we can see at least one Team. If the user has specified a list of Teams by name,
|
||||
confirm at least one of them is found.
|
||||
|
||||
Raises:
|
||||
ConnectorMissingCredentialError: If the Graph client is not yet set (missing credentials).
|
||||
CredentialExpiredError: If credentials appear invalid/expired (e.g. 401 Unauthorized).
|
||||
InsufficientPermissionsError: If the app lacks required permissions to read Teams.
|
||||
ConnectorValidationError: If no Teams are found, or if requested Teams are not found.
|
||||
"""
|
||||
if self.graph_client is None:
|
||||
raise ConnectorMissingCredentialError("Teams credentials not loaded.")
|
||||
|
||||
try:
|
||||
# Minimal call to confirm we can retrieve Teams
|
||||
found_teams = self._get_all_teams()
|
||||
|
||||
# You may optionally catch the Graph/Office365 request exception if available:
|
||||
except ClientRequestException as e:
|
||||
status_code = e.response.status_code
|
||||
if status_code == 401:
|
||||
raise CredentialExpiredError(
|
||||
"Invalid or expired Microsoft Teams credentials (401 Unauthorized)."
|
||||
)
|
||||
elif status_code == 403:
|
||||
raise InsufficientPermissionsError(
|
||||
"Your app lacks sufficient permissions to read Teams (403 Forbidden)."
|
||||
)
|
||||
else:
|
||||
raise UnexpectedError(f"Unexpected error retrieving teams: {e}")
|
||||
|
||||
except Exception as e:
|
||||
error_str = str(e).lower()
|
||||
if (
|
||||
"unauthorized" in error_str
|
||||
or "401" in error_str
|
||||
or "invalid_grant" in error_str
|
||||
):
|
||||
raise CredentialExpiredError(
|
||||
"Invalid or expired Microsoft Teams credentials."
|
||||
)
|
||||
elif "forbidden" in error_str or "403" in error_str:
|
||||
raise InsufficientPermissionsError(
|
||||
"App lacks required permissions to read from Microsoft Teams."
|
||||
)
|
||||
raise ConnectorValidationError(
|
||||
f"Unexpected error during Teams validation: {e}"
|
||||
)
|
||||
|
||||
# If we get this far, the Graph call succeeded. Check for presence of Teams:
|
||||
if not found_teams:
|
||||
raise ConnectorValidationError(
|
||||
"No Teams found for the given credentials. "
|
||||
"Either there are no Teams in this tenant, or your app does not have permission to view them."
|
||||
)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
connector = TeamsConnector(teams=os.environ["TEAMS"].split(","))
|
||||
|
||||
@@ -440,7 +440,10 @@ class WebConnector(LoadConnector):
|
||||
"No URL configured. Please provide at least one valid URL."
|
||||
)
|
||||
|
||||
if self.web_connector_type == WEB_CONNECTOR_VALID_SETTINGS.SITEMAP.value:
|
||||
if (
|
||||
self.web_connector_type == WEB_CONNECTOR_VALID_SETTINGS.SITEMAP.value
|
||||
or self.web_connector_type == WEB_CONNECTOR_VALID_SETTINGS.RECURSIVE.value
|
||||
):
|
||||
return None
|
||||
|
||||
# We'll just test the first URL for connectivity and correctness
|
||||
|
||||
@@ -194,9 +194,14 @@ def get_connector_credential_pair_from_id_for_user(
|
||||
def get_connector_credential_pair_from_id(
|
||||
db_session: Session,
|
||||
cc_pair_id: int,
|
||||
eager_load_credential: bool = False,
|
||||
) -> ConnectorCredentialPair | None:
|
||||
stmt = select(ConnectorCredentialPair).distinct()
|
||||
stmt = stmt.where(ConnectorCredentialPair.id == cc_pair_id)
|
||||
|
||||
if eager_load_credential:
|
||||
stmt = stmt.options(joinedload(ConnectorCredentialPair.credential))
|
||||
|
||||
result = db_session.execute(stmt)
|
||||
return result.scalar_one_or_none()
|
||||
|
||||
|
||||
@@ -60,9 +60,8 @@ def count_documents_by_needs_sync(session: Session) -> int:
|
||||
This function executes the query and returns the count of
|
||||
documents matching the criteria."""
|
||||
|
||||
count = (
|
||||
session.query(func.count(DbDocument.id.distinct()))
|
||||
.select_from(DbDocument)
|
||||
return (
|
||||
session.query(DbDocument.id)
|
||||
.join(
|
||||
DocumentByConnectorCredentialPair,
|
||||
DbDocument.id == DocumentByConnectorCredentialPair.id,
|
||||
@@ -73,63 +72,53 @@ def count_documents_by_needs_sync(session: Session) -> int:
|
||||
DbDocument.last_synced.is_(None),
|
||||
)
|
||||
)
|
||||
.scalar()
|
||||
.count()
|
||||
)
|
||||
|
||||
return count
|
||||
|
||||
|
||||
def construct_document_select_for_connector_credential_pair_by_needs_sync(
|
||||
connector_id: int, credential_id: int
|
||||
) -> Select:
|
||||
initial_doc_ids_stmt = select(DocumentByConnectorCredentialPair.id).where(
|
||||
and_(
|
||||
DocumentByConnectorCredentialPair.connector_id == connector_id,
|
||||
DocumentByConnectorCredentialPair.credential_id == credential_id,
|
||||
)
|
||||
)
|
||||
|
||||
stmt = (
|
||||
return (
|
||||
select(DbDocument)
|
||||
.where(
|
||||
DbDocument.id.in_(initial_doc_ids_stmt),
|
||||
or_(
|
||||
DbDocument.last_modified
|
||||
> DbDocument.last_synced, # last_modified is newer than last_synced
|
||||
DbDocument.last_synced.is_(None), # never synced
|
||||
),
|
||||
.join(
|
||||
DocumentByConnectorCredentialPair,
|
||||
DbDocument.id == DocumentByConnectorCredentialPair.id,
|
||||
)
|
||||
.where(
|
||||
and_(
|
||||
DocumentByConnectorCredentialPair.connector_id == connector_id,
|
||||
DocumentByConnectorCredentialPair.credential_id == credential_id,
|
||||
or_(
|
||||
DbDocument.last_modified > DbDocument.last_synced,
|
||||
DbDocument.last_synced.is_(None),
|
||||
),
|
||||
)
|
||||
)
|
||||
.distinct()
|
||||
)
|
||||
|
||||
return stmt
|
||||
|
||||
|
||||
def construct_document_id_select_for_connector_credential_pair_by_needs_sync(
|
||||
connector_id: int, credential_id: int
|
||||
) -> Select:
|
||||
initial_doc_ids_stmt = select(DocumentByConnectorCredentialPair.id).where(
|
||||
and_(
|
||||
DocumentByConnectorCredentialPair.connector_id == connector_id,
|
||||
DocumentByConnectorCredentialPair.credential_id == credential_id,
|
||||
)
|
||||
)
|
||||
|
||||
stmt = (
|
||||
return (
|
||||
select(DbDocument.id)
|
||||
.where(
|
||||
DbDocument.id.in_(initial_doc_ids_stmt),
|
||||
or_(
|
||||
DbDocument.last_modified
|
||||
> DbDocument.last_synced, # last_modified is newer than last_synced
|
||||
DbDocument.last_synced.is_(None), # never synced
|
||||
),
|
||||
.join(
|
||||
DocumentByConnectorCredentialPair,
|
||||
DbDocument.id == DocumentByConnectorCredentialPair.id,
|
||||
)
|
||||
.where(
|
||||
and_(
|
||||
DocumentByConnectorCredentialPair.connector_id == connector_id,
|
||||
DocumentByConnectorCredentialPair.credential_id == credential_id,
|
||||
or_(
|
||||
DbDocument.last_modified > DbDocument.last_synced,
|
||||
DbDocument.last_synced.is_(None),
|
||||
),
|
||||
)
|
||||
)
|
||||
.distinct()
|
||||
)
|
||||
|
||||
return stmt
|
||||
|
||||
|
||||
def get_all_documents_needing_vespa_sync_for_cc_pair(
|
||||
db_session: Session, cc_pair_id: int
|
||||
|
||||
@@ -570,6 +570,14 @@ class Document(Base):
|
||||
back_populates="documents",
|
||||
)
|
||||
|
||||
__table_args__ = (
|
||||
Index(
|
||||
"ix_document_sync_status",
|
||||
last_modified,
|
||||
last_synced,
|
||||
),
|
||||
)
|
||||
|
||||
|
||||
class Tag(Base):
|
||||
__tablename__ = "tag"
|
||||
|
||||
Reference in New Issue
Block a user