mirror of
https://github.com/onyx-dot-app/onyx.git
synced 2026-02-25 11:45:47 +00:00
Compare commits
18 Commits
ci_python_
...
v0.20.0-cl
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
72fc20025d | ||
|
|
2cce088767 | ||
|
|
e81e047eb6 | ||
|
|
887f92d8e6 | ||
|
|
78d8017691 | ||
|
|
cc450cd1e8 | ||
|
|
9d181542fb | ||
|
|
e0d9822e7c | ||
|
|
43d0a10ad6 | ||
|
|
b4edc35697 | ||
|
|
9dc77620d6 | ||
|
|
33b4f4c9ea | ||
|
|
907fd3db50 | ||
|
|
ed9e710800 | ||
|
|
9099199258 | ||
|
|
fbc2da3ab8 | ||
|
|
8066785f7b | ||
|
|
b026304c72 |
@@ -4,6 +4,7 @@ from sqlalchemy.orm import Session
|
||||
from onyx.configs.constants import DocumentSource
|
||||
from onyx.db.connector_credential_pair import get_connector_credential_pair
|
||||
from onyx.db.enums import AccessType
|
||||
from onyx.db.enums import ConnectorCredentialPairStatus
|
||||
from onyx.db.models import Connector
|
||||
from onyx.db.models import ConnectorCredentialPair
|
||||
from onyx.db.models import UserGroup__ConnectorCredentialPair
|
||||
@@ -36,6 +37,7 @@ def get_cc_pairs_by_source(
|
||||
db_session: Session,
|
||||
source_type: DocumentSource,
|
||||
only_sync: bool,
|
||||
only_valid: bool,
|
||||
) -> list[ConnectorCredentialPair]:
|
||||
"""
|
||||
Get all cc_pairs for a given source type (and optionally only sync)
|
||||
@@ -51,6 +53,11 @@ def get_cc_pairs_by_source(
|
||||
if only_sync:
|
||||
query = query.filter(ConnectorCredentialPair.access_type == AccessType.SYNC)
|
||||
|
||||
if only_valid:
|
||||
query = query.filter(
|
||||
ConnectorCredentialPair.status == ConnectorCredentialPairStatus.ACTIVE
|
||||
)
|
||||
|
||||
cc_pairs = query.all()
|
||||
return cc_pairs
|
||||
|
||||
|
||||
@@ -62,12 +62,14 @@ def _fetch_permissions_for_permission_ids(
|
||||
user_email=(owner_email or google_drive_connector.primary_admin_email),
|
||||
)
|
||||
|
||||
# We continue on 404 or 403 because the document may not exist or the user may not have access to it
|
||||
fetched_permissions = execute_paginated_retrieval(
|
||||
retrieval_function=drive_service.permissions().list,
|
||||
list_key="permissions",
|
||||
fileId=doc_id,
|
||||
fields="permissions(id, emailAddress, type, domain)",
|
||||
supportsAllDrives=True,
|
||||
continue_on_404_or_403=True,
|
||||
)
|
||||
|
||||
permissions_for_doc_id = []
|
||||
@@ -104,7 +106,13 @@ def _get_permissions_from_slim_doc(
|
||||
user_emails: set[str] = set()
|
||||
group_emails: set[str] = set()
|
||||
public = False
|
||||
skipped_permissions = 0
|
||||
|
||||
for permission in permissions_list:
|
||||
if not permission:
|
||||
skipped_permissions += 1
|
||||
continue
|
||||
|
||||
permission_type = permission["type"]
|
||||
if permission_type == "user":
|
||||
user_emails.add(permission["emailAddress"])
|
||||
@@ -121,6 +129,9 @@ def _get_permissions_from_slim_doc(
|
||||
elif permission_type == "anyone":
|
||||
public = True
|
||||
|
||||
logger.info(
|
||||
f"Skipped {skipped_permissions} permissions for document {slim_doc.id}– {len(permissions_list)} permissions fetched"
|
||||
)
|
||||
drive_id = permission_info.get("drive_id")
|
||||
group_ids = group_emails | ({drive_id} if drive_id is not None else set())
|
||||
|
||||
|
||||
@@ -42,8 +42,10 @@ from onyx.configs.constants import OnyxCeleryTask
|
||||
from onyx.configs.constants import OnyxRedisConstants
|
||||
from onyx.configs.constants import OnyxRedisLocks
|
||||
from onyx.configs.constants import OnyxRedisSignals
|
||||
from onyx.connectors.factory import validate_ccpair_for_user
|
||||
from onyx.db.connector import mark_cc_pair_as_permissions_synced
|
||||
from onyx.db.connector_credential_pair import get_connector_credential_pair_from_id
|
||||
from onyx.db.connector_credential_pair import update_connector_credential_pair
|
||||
from onyx.db.document import upsert_document_by_connector_credential_pair
|
||||
from onyx.db.engine import get_session_with_current_tenant
|
||||
from onyx.db.enums import AccessType
|
||||
@@ -193,12 +195,19 @@ def check_for_doc_permissions_sync(self: Task, *, tenant_id: str) -> bool | None
|
||||
monitor_ccpair_permissions_taskset(
|
||||
tenant_id, key_bytes, r, db_session
|
||||
)
|
||||
task_logger.info(f"check_for_doc_permissions_sync finished: tenant={tenant_id}")
|
||||
except SoftTimeLimitExceeded:
|
||||
task_logger.info(
|
||||
"Soft time limit exceeded, task is being terminated gracefully."
|
||||
)
|
||||
except Exception:
|
||||
task_logger.exception(f"Unexpected exception: tenant={tenant_id}")
|
||||
except Exception as e:
|
||||
error_msg = str(e).replace("\n", " ")
|
||||
task_logger.warning(
|
||||
f"Unexpected check_for_doc_permissions_sync exception: tenant={tenant_id} {error_msg}"
|
||||
)
|
||||
task_logger.exception(
|
||||
f"Unexpected check_for_doc_permissions_sync exception: tenant={tenant_id}"
|
||||
)
|
||||
finally:
|
||||
if lock_beat.owned():
|
||||
lock_beat.release()
|
||||
@@ -282,13 +291,19 @@ def try_creating_permissions_sync_task(
|
||||
redis_connector.permissions.set_fence(payload)
|
||||
|
||||
payload_id = payload.id
|
||||
except Exception:
|
||||
task_logger.exception(f"Unexpected exception: cc_pair={cc_pair_id}")
|
||||
except Exception as e:
|
||||
error_msg = str(e).replace("\n", " ")
|
||||
task_logger.warning(
|
||||
f"Unexpected try_creating_permissions_sync_task exception: cc_pair={cc_pair_id} {error_msg}"
|
||||
)
|
||||
return None
|
||||
finally:
|
||||
if lock.owned():
|
||||
lock.release()
|
||||
|
||||
task_logger.info(
|
||||
f"try_creating_permissions_sync_task finished: cc_pair={cc_pair_id} payload_id={payload_id}"
|
||||
)
|
||||
return payload_id
|
||||
|
||||
|
||||
@@ -388,6 +403,30 @@ def connector_permission_sync_generator_task(
|
||||
f"No connector credential pair found for id: {cc_pair_id}"
|
||||
)
|
||||
|
||||
try:
|
||||
created = validate_ccpair_for_user(
|
||||
cc_pair.connector.id,
|
||||
cc_pair.credential.id,
|
||||
db_session,
|
||||
tenant_id,
|
||||
enforce_creation=False,
|
||||
)
|
||||
if not created:
|
||||
task_logger.warning(
|
||||
f"Unable to create connector credential pair for id: {cc_pair_id}"
|
||||
)
|
||||
except Exception:
|
||||
task_logger.exception(
|
||||
f"validate_ccpair_permissions_sync exceptioned: cc_pair={cc_pair_id}"
|
||||
)
|
||||
update_connector_credential_pair(
|
||||
db_session=db_session,
|
||||
connector_id=cc_pair.connector.id,
|
||||
credential_id=cc_pair.credential.id,
|
||||
status=ConnectorCredentialPairStatus.INVALID,
|
||||
)
|
||||
raise
|
||||
|
||||
source_type = cc_pair.connector.source
|
||||
|
||||
doc_sync_func = DOC_PERMISSIONS_FUNC_MAP.get(source_type)
|
||||
@@ -439,6 +478,10 @@ def connector_permission_sync_generator_task(
|
||||
redis_connector.permissions.generator_complete = tasks_generated
|
||||
|
||||
except Exception as e:
|
||||
error_msg = str(e).replace("\n", " ")
|
||||
task_logger.warning(
|
||||
f"Permission sync exceptioned: cc_pair={cc_pair_id} payload_id={payload_id} {error_msg}"
|
||||
)
|
||||
task_logger.exception(
|
||||
f"Permission sync exceptioned: cc_pair={cc_pair_id} payload_id={payload_id}"
|
||||
)
|
||||
@@ -512,13 +555,20 @@ def update_external_document_permissions_task(
|
||||
f"elapsed={elapsed:.2f}"
|
||||
)
|
||||
|
||||
except Exception:
|
||||
except Exception as e:
|
||||
error_msg = str(e).replace("\n", " ")
|
||||
task_logger.warning(
|
||||
f"Exception in update_external_document_permissions_task: connector_id={connector_id} doc_id={doc_id} {error_msg}"
|
||||
)
|
||||
task_logger.exception(
|
||||
f"Exception in update_external_document_permissions_task: "
|
||||
f"connector_id={connector_id} doc_id={doc_id}"
|
||||
)
|
||||
return False
|
||||
|
||||
task_logger.info(
|
||||
f"update_external_document_permissions_task finished: connector_id={connector_id} doc_id={doc_id}"
|
||||
)
|
||||
return True
|
||||
|
||||
|
||||
|
||||
@@ -37,8 +37,10 @@ from onyx.configs.constants import OnyxCeleryTask
|
||||
from onyx.configs.constants import OnyxRedisConstants
|
||||
from onyx.configs.constants import OnyxRedisLocks
|
||||
from onyx.configs.constants import OnyxRedisSignals
|
||||
from onyx.connectors.factory import validate_ccpair_for_user
|
||||
from onyx.db.connector import mark_cc_pair_as_external_group_synced
|
||||
from onyx.db.connector_credential_pair import get_connector_credential_pair_from_id
|
||||
from onyx.db.connector_credential_pair import update_connector_credential_pair
|
||||
from onyx.db.engine import get_session_with_current_tenant
|
||||
from onyx.db.enums import AccessType
|
||||
from onyx.db.enums import ConnectorCredentialPairStatus
|
||||
@@ -148,7 +150,7 @@ def check_for_external_group_sync(self: Task, *, tenant_id: str | None) -> bool
|
||||
for source in GROUP_PERMISSIONS_IS_CC_PAIR_AGNOSTIC:
|
||||
# These are ordered by cc_pair id so the first one is the one we want
|
||||
cc_pairs_to_dedupe = get_cc_pairs_by_source(
|
||||
db_session, source, only_sync=True
|
||||
db_session, source, only_sync=True, only_valid=True
|
||||
)
|
||||
# We only want to sync one cc_pair per source type
|
||||
# in GROUP_PERMISSIONS_IS_CC_PAIR_AGNOSTIC so we dedupe here
|
||||
@@ -195,12 +197,17 @@ def check_for_external_group_sync(self: Task, *, tenant_id: str | None) -> bool
|
||||
task_logger.info(
|
||||
"Soft time limit exceeded, task is being terminated gracefully."
|
||||
)
|
||||
except Exception:
|
||||
except Exception as e:
|
||||
error_msg = str(e).replace("\n", " ")
|
||||
task_logger.warning(
|
||||
f"Unexpected check_for_external_group_sync exception: tenant={tenant_id} {error_msg}"
|
||||
)
|
||||
task_logger.exception(f"Unexpected exception: tenant={tenant_id}")
|
||||
finally:
|
||||
if lock_beat.owned():
|
||||
lock_beat.release()
|
||||
|
||||
task_logger.info(f"check_for_external_group_sync finished: tenant={tenant_id}")
|
||||
return True
|
||||
|
||||
|
||||
@@ -267,12 +274,19 @@ def try_creating_external_group_sync_task(
|
||||
redis_connector.external_group_sync.set_fence(payload)
|
||||
|
||||
payload_id = payload.id
|
||||
except Exception:
|
||||
except Exception as e:
|
||||
error_msg = str(e).replace("\n", " ")
|
||||
task_logger.warning(
|
||||
f"Unexpected try_creating_external_group_sync_task exception: cc_pair={cc_pair_id} {error_msg}"
|
||||
)
|
||||
task_logger.exception(
|
||||
f"Unexpected exception while trying to create external group sync task: cc_pair={cc_pair_id}"
|
||||
)
|
||||
return None
|
||||
|
||||
task_logger.info(
|
||||
f"try_creating_external_group_sync_task finished: cc_pair={cc_pair_id} payload_id={payload_id}"
|
||||
)
|
||||
return payload_id
|
||||
|
||||
|
||||
@@ -361,12 +375,37 @@ def connector_external_group_sync_generator_task(
|
||||
cc_pair = get_connector_credential_pair_from_id(
|
||||
db_session=db_session,
|
||||
cc_pair_id=cc_pair_id,
|
||||
eager_load_credential=True,
|
||||
)
|
||||
if cc_pair is None:
|
||||
raise ValueError(
|
||||
f"No connector credential pair found for id: {cc_pair_id}"
|
||||
)
|
||||
|
||||
try:
|
||||
created = validate_ccpair_for_user(
|
||||
cc_pair.connector.id,
|
||||
cc_pair.credential.id,
|
||||
db_session,
|
||||
tenant_id,
|
||||
enforce_creation=False,
|
||||
)
|
||||
if not created:
|
||||
task_logger.warning(
|
||||
f"Unable to create connector credential pair for id: {cc_pair_id}"
|
||||
)
|
||||
except Exception:
|
||||
task_logger.exception(
|
||||
f"validate_ccpair_permissions_sync exceptioned: cc_pair={cc_pair_id}"
|
||||
)
|
||||
update_connector_credential_pair(
|
||||
db_session=db_session,
|
||||
connector_id=cc_pair.connector.id,
|
||||
credential_id=cc_pair.credential.id,
|
||||
status=ConnectorCredentialPairStatus.INVALID,
|
||||
)
|
||||
raise
|
||||
|
||||
source_type = cc_pair.connector.source
|
||||
|
||||
ext_group_sync_func = GROUP_PERMISSIONS_FUNC_MAP.get(source_type)
|
||||
@@ -405,6 +444,14 @@ def connector_external_group_sync_generator_task(
|
||||
sync_status=SyncStatus.SUCCESS,
|
||||
)
|
||||
except Exception as e:
|
||||
error_msg = str(e).replace("\n", " ")
|
||||
task_logger.warning(
|
||||
f"External group sync exceptioned: cc_pair={cc_pair_id} payload_id={payload.id} {error_msg}"
|
||||
)
|
||||
task_logger.exception(
|
||||
f"External group sync exceptioned: cc_pair={cc_pair_id} payload_id={payload.id}"
|
||||
)
|
||||
|
||||
msg = f"External group sync exceptioned: cc_pair={cc_pair_id} payload_id={payload.id}"
|
||||
task_logger.exception(msg)
|
||||
emit_background_error(msg + f"\n\n{e}", cc_pair_id=cc_pair_id)
|
||||
|
||||
@@ -77,6 +77,7 @@ from shared_configs.configs import INDEXING_MODEL_SERVER_HOST
|
||||
from shared_configs.configs import INDEXING_MODEL_SERVER_PORT
|
||||
from shared_configs.configs import MULTI_TENANT
|
||||
from shared_configs.configs import SENTRY_DSN
|
||||
from shared_configs.contextvars import CURRENT_TENANT_ID_CONTEXTVAR
|
||||
|
||||
logger = setup_logger()
|
||||
|
||||
@@ -617,6 +618,12 @@ def connector_indexing_task(
|
||||
This will cause the primary worker to abort the indexing attempt and clean up.
|
||||
"""
|
||||
|
||||
tenant_id_from_context = CURRENT_TENANT_ID_CONTEXTVAR.get()
|
||||
logger.info(f"connector_indexing_task with tenant_id={tenant_id_from_context}")
|
||||
logger.info(
|
||||
f"connector_indexing_task with args={index_attempt_id}, {cc_pair_id}, {search_settings_id}, {is_ee}, {tenant_id}"
|
||||
)
|
||||
|
||||
# Since connector_indexing_proxy_task spawns a new process using this function as
|
||||
# the entrypoint, we init Sentry here.
|
||||
if SENTRY_DSN:
|
||||
@@ -924,6 +931,7 @@ def connector_indexing_proxy_task(
|
||||
task_logger.error("self.request.id is None!")
|
||||
|
||||
client = SimpleJobClient()
|
||||
task_logger.info(f"submitting connector_indexing_task with tenant_id={tenant_id}")
|
||||
|
||||
job = client.submit(
|
||||
connector_indexing_task,
|
||||
@@ -1070,6 +1078,7 @@ def connector_indexing_proxy_task(
|
||||
|
||||
if not index_attempt.is_finished():
|
||||
continue
|
||||
|
||||
except Exception:
|
||||
# if the DB exceptioned, just restart the check.
|
||||
# polling the index attempt status doesn't need to be strongly consistent
|
||||
@@ -1079,6 +1088,7 @@ def connector_indexing_proxy_task(
|
||||
)
|
||||
)
|
||||
continue
|
||||
|
||||
except Exception as e:
|
||||
result.status = IndexingWatchdogTerminalStatus.WATCHDOG_EXCEPTIONED
|
||||
if isinstance(e, ConnectorValidationError):
|
||||
|
||||
@@ -194,12 +194,14 @@ def check_for_pruning(self: Task, *, tenant_id: str | None) -> bool | None:
|
||||
task_logger.info(
|
||||
"Soft time limit exceeded, task is being terminated gracefully."
|
||||
)
|
||||
except Exception:
|
||||
except Exception as e:
|
||||
error_msg = str(e).replace("\n", " ")
|
||||
task_logger.warning(f"Unexpected pruning check exception: {error_msg}")
|
||||
task_logger.exception("Unexpected exception during pruning check")
|
||||
finally:
|
||||
if lock_beat.owned():
|
||||
lock_beat.release()
|
||||
|
||||
task_logger.info(f"check_for_pruning finished: tenant={tenant_id}")
|
||||
return True
|
||||
|
||||
|
||||
@@ -301,13 +303,19 @@ def try_creating_prune_generator_task(
|
||||
redis_connector.prune.set_fence(payload)
|
||||
|
||||
payload_id = payload.id
|
||||
except Exception:
|
||||
except Exception as e:
|
||||
error_msg = str(e).replace("\n", " ")
|
||||
task_logger.warning(
|
||||
f"Unexpected try_creating_prune_generator_task exception: cc_pair={cc_pair.id} {error_msg}"
|
||||
)
|
||||
task_logger.exception(f"Unexpected exception: cc_pair={cc_pair.id}")
|
||||
return None
|
||||
finally:
|
||||
if lock.owned():
|
||||
lock.release()
|
||||
|
||||
task_logger.info(
|
||||
f"try_creating_prune_generator_task finished: cc_pair={cc_pair.id} payload_id={payload_id}"
|
||||
)
|
||||
return payload_id
|
||||
|
||||
|
||||
|
||||
@@ -192,6 +192,8 @@ def document_by_cc_pair_cleanup_task(
|
||||
)
|
||||
return False
|
||||
|
||||
error_msg = str(e).replace("\n", " ")
|
||||
task_logger.warning(f"Unexpected exception: doc={document_id} {error_msg}")
|
||||
task_logger.exception(f"Unexpected exception: doc={document_id}")
|
||||
|
||||
if self.request.retries < DOCUMENT_BY_CC_PAIR_CLEANUP_MAX_RETRIES:
|
||||
@@ -219,6 +221,7 @@ def document_by_cc_pair_cleanup_task(
|
||||
mark_document_as_modified(document_id, db_session)
|
||||
return False
|
||||
|
||||
task_logger.info(f"document_by_cc_pair_cleanup_task finished: doc={document_id}")
|
||||
return True
|
||||
|
||||
|
||||
|
||||
@@ -17,6 +17,9 @@ from typing import Optional
|
||||
from onyx.configs.constants import POSTGRES_CELERY_WORKER_INDEXING_CHILD_APP_NAME
|
||||
from onyx.db.engine import SqlEngine
|
||||
from onyx.utils.logger import setup_logger
|
||||
from shared_configs.configs import POSTGRES_DEFAULT_SCHEMA
|
||||
from shared_configs.configs import TENANT_ID_PREFIX
|
||||
from shared_configs.contextvars import CURRENT_TENANT_ID_CONTEXTVAR
|
||||
|
||||
logger = setup_logger()
|
||||
|
||||
@@ -54,6 +57,15 @@ def _initializer(
|
||||
kwargs = {}
|
||||
|
||||
logger.info("Initializing spawned worker child process.")
|
||||
# 1. Get tenant_id from args or fallback to default
|
||||
tenant_id = POSTGRES_DEFAULT_SCHEMA
|
||||
for arg in reversed(args):
|
||||
if arg.startswith(TENANT_ID_PREFIX):
|
||||
tenant_id = arg
|
||||
break
|
||||
|
||||
# 2. Set the tenant context before running anything
|
||||
token = CURRENT_TENANT_ID_CONTEXTVAR.set(tenant_id)
|
||||
|
||||
# Reset the engine in the child process
|
||||
SqlEngine.reset_engine()
|
||||
@@ -81,6 +93,8 @@ def _initializer(
|
||||
queue.put(error_msg) # Send the exception to the parent process
|
||||
|
||||
sys.exit(255) # use 255 to indicate a generic exception
|
||||
finally:
|
||||
CURRENT_TENANT_ID_CONTEXTVAR.reset(token)
|
||||
|
||||
|
||||
def _run_in_process(
|
||||
|
||||
@@ -7,15 +7,22 @@ from typing import Optional
|
||||
|
||||
import boto3 # type: ignore
|
||||
from botocore.client import Config # type: ignore
|
||||
from botocore.exceptions import ClientError
|
||||
from botocore.exceptions import NoCredentialsError
|
||||
from botocore.exceptions import PartialCredentialsError
|
||||
from mypy_boto3_s3 import S3Client # type: ignore
|
||||
|
||||
from onyx.configs.app_configs import INDEX_BATCH_SIZE
|
||||
from onyx.configs.constants import BlobType
|
||||
from onyx.configs.constants import DocumentSource
|
||||
from onyx.connectors.interfaces import ConnectorValidationError
|
||||
from onyx.connectors.interfaces import CredentialExpiredError
|
||||
from onyx.connectors.interfaces import GenerateDocumentsOutput
|
||||
from onyx.connectors.interfaces import InsufficientPermissionsError
|
||||
from onyx.connectors.interfaces import LoadConnector
|
||||
from onyx.connectors.interfaces import PollConnector
|
||||
from onyx.connectors.interfaces import SecondsSinceUnixEpoch
|
||||
from onyx.connectors.interfaces import UnexpectedError
|
||||
from onyx.connectors.models import ConnectorMissingCredentialError
|
||||
from onyx.connectors.models import Document
|
||||
from onyx.connectors.models import Section
|
||||
@@ -240,6 +247,73 @@ class BlobStorageConnector(LoadConnector, PollConnector):
|
||||
|
||||
return None
|
||||
|
||||
def validate_connector_settings(self) -> None:
|
||||
if self.s3_client is None:
|
||||
raise ConnectorMissingCredentialError(
|
||||
"Blob storage credentials not loaded."
|
||||
)
|
||||
|
||||
if not self.bucket_name:
|
||||
raise ConnectorValidationError(
|
||||
"No bucket name was provided in connector settings."
|
||||
)
|
||||
|
||||
try:
|
||||
# We only fetch one object/page as a light-weight validation step.
|
||||
# This ensures we trigger typical S3 permission checks (ListObjectsV2, etc.).
|
||||
self.s3_client.list_objects_v2(
|
||||
Bucket=self.bucket_name, Prefix=self.prefix, MaxKeys=1
|
||||
)
|
||||
|
||||
except NoCredentialsError:
|
||||
raise ConnectorMissingCredentialError(
|
||||
"No valid blob storage credentials found or provided to boto3."
|
||||
)
|
||||
except PartialCredentialsError:
|
||||
raise ConnectorMissingCredentialError(
|
||||
"Partial or incomplete blob storage credentials provided to boto3."
|
||||
)
|
||||
except ClientError as e:
|
||||
error_code = e.response["Error"].get("Code", "")
|
||||
status_code = e.response["ResponseMetadata"].get("HTTPStatusCode")
|
||||
|
||||
# Most common S3 error cases
|
||||
if error_code in [
|
||||
"AccessDenied",
|
||||
"InvalidAccessKeyId",
|
||||
"SignatureDoesNotMatch",
|
||||
]:
|
||||
if status_code == 403 or error_code == "AccessDenied":
|
||||
raise InsufficientPermissionsError(
|
||||
f"Insufficient permissions to list objects in bucket '{self.bucket_name}'. "
|
||||
"Please check your bucket policy and/or IAM policy."
|
||||
)
|
||||
if status_code == 401 or error_code == "SignatureDoesNotMatch":
|
||||
raise CredentialExpiredError(
|
||||
"Provided blob storage credentials appear invalid or expired."
|
||||
)
|
||||
|
||||
raise CredentialExpiredError(
|
||||
f"Credential issue encountered ({error_code})."
|
||||
)
|
||||
|
||||
if error_code == "NoSuchBucket" or status_code == 404:
|
||||
raise ConnectorValidationError(
|
||||
f"Bucket '{self.bucket_name}' does not exist or cannot be found."
|
||||
)
|
||||
|
||||
raise ConnectorValidationError(
|
||||
f"Unexpected S3 client error (code={error_code}, status={status_code}): {e}"
|
||||
)
|
||||
|
||||
except Exception as e:
|
||||
# Catch-all for anything not captured by the above
|
||||
# Since we are unsure of the error and it may not disable the connector,
|
||||
# raise an unexpected error (does not disable connector)
|
||||
raise UnexpectedError(
|
||||
f"Unexpected error during blob storage settings validation: {e}"
|
||||
)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
credentials_dict = {
|
||||
|
||||
@@ -4,6 +4,8 @@ from datetime import timezone
|
||||
from typing import Any
|
||||
from urllib.parse import quote
|
||||
|
||||
from requests.exceptions import HTTPError
|
||||
|
||||
from onyx.configs.app_configs import CONFLUENCE_CONNECTOR_LABELS_TO_SKIP
|
||||
from onyx.configs.app_configs import CONFLUENCE_TIMEZONE_OFFSET
|
||||
from onyx.configs.app_configs import CONTINUE_ON_CONNECTOR_FAILURE
|
||||
@@ -16,12 +18,16 @@ from onyx.connectors.confluence.utils import build_confluence_document_id
|
||||
from onyx.connectors.confluence.utils import datetime_from_string
|
||||
from onyx.connectors.confluence.utils import extract_text_from_confluence_html
|
||||
from onyx.connectors.confluence.utils import validate_attachment_filetype
|
||||
from onyx.connectors.interfaces import ConnectorValidationError
|
||||
from onyx.connectors.interfaces import CredentialExpiredError
|
||||
from onyx.connectors.interfaces import GenerateDocumentsOutput
|
||||
from onyx.connectors.interfaces import GenerateSlimDocumentOutput
|
||||
from onyx.connectors.interfaces import InsufficientPermissionsError
|
||||
from onyx.connectors.interfaces import LoadConnector
|
||||
from onyx.connectors.interfaces import PollConnector
|
||||
from onyx.connectors.interfaces import SecondsSinceUnixEpoch
|
||||
from onyx.connectors.interfaces import SlimConnector
|
||||
from onyx.connectors.interfaces import UnexpectedError
|
||||
from onyx.connectors.models import BasicExpertInfo
|
||||
from onyx.connectors.models import ConnectorMissingCredentialError
|
||||
from onyx.connectors.models import Document
|
||||
@@ -397,3 +403,34 @@ class ConfluenceConnector(LoadConnector, PollConnector, SlimConnector):
|
||||
callback.progress("retrieve_all_slim_documents", 1)
|
||||
|
||||
yield doc_metadata_list
|
||||
|
||||
def validate_connector_settings(self) -> None:
|
||||
if self._confluence_client is None:
|
||||
raise ConnectorMissingCredentialError("Confluence credentials not loaded.")
|
||||
|
||||
try:
|
||||
spaces = self._confluence_client.get_all_spaces(limit=1)
|
||||
except HTTPError as e:
|
||||
status_code = e.response.status_code if e.response else None
|
||||
if status_code == 401:
|
||||
raise CredentialExpiredError(
|
||||
"Invalid or expired Confluence credentials (HTTP 401)."
|
||||
)
|
||||
elif status_code == 403:
|
||||
raise InsufficientPermissionsError(
|
||||
"Insufficient permissions to access Confluence resources (HTTP 403)."
|
||||
)
|
||||
else:
|
||||
raise UnexpectedError(
|
||||
f"Unexpected Confluence error (status={status_code}): {e}"
|
||||
)
|
||||
except Exception as e:
|
||||
raise UnexpectedError(
|
||||
f"Unexpected error while validating Confluence settings: {e}"
|
||||
)
|
||||
|
||||
if not spaces or not spaces.get("results"):
|
||||
raise ConnectorValidationError(
|
||||
"No Confluence spaces found. Either your credentials lack permissions, or "
|
||||
"there truly are no spaces in this Confluence instance."
|
||||
)
|
||||
|
||||
@@ -55,9 +55,8 @@ from onyx.connectors.zendesk.connector import ZendeskConnector
|
||||
from onyx.connectors.zulip.connector import ZulipConnector
|
||||
from onyx.db.connector import fetch_connector_by_id
|
||||
from onyx.db.credentials import backend_update_credential_json
|
||||
from onyx.db.credentials import fetch_credential_by_id_for_user
|
||||
from onyx.db.credentials import fetch_credential_by_id
|
||||
from onyx.db.models import Credential
|
||||
from onyx.db.models import User
|
||||
|
||||
|
||||
class ConnectorMissingException(Exception):
|
||||
@@ -184,23 +183,21 @@ def validate_ccpair_for_user(
|
||||
connector_id: int,
|
||||
credential_id: int,
|
||||
db_session: Session,
|
||||
user: User | None,
|
||||
tenant_id: str | None,
|
||||
) -> None:
|
||||
enforce_creation: bool = True,
|
||||
) -> bool:
|
||||
# Validate the connector settings
|
||||
connector = fetch_connector_by_id(connector_id, db_session)
|
||||
credential = fetch_credential_by_id_for_user(
|
||||
credential = fetch_credential_by_id(
|
||||
credential_id,
|
||||
user,
|
||||
db_session,
|
||||
get_editable=False,
|
||||
)
|
||||
|
||||
if not connector:
|
||||
raise ValueError("Connector not found")
|
||||
|
||||
if connector.source == DocumentSource.INGESTION_API:
|
||||
return
|
||||
return False
|
||||
|
||||
if not credential:
|
||||
raise ValueError("Credential not found")
|
||||
@@ -215,6 +212,10 @@ def validate_ccpair_for_user(
|
||||
tenant_id=tenant_id,
|
||||
)
|
||||
except Exception as e:
|
||||
raise ConnectorValidationError(str(e))
|
||||
if enforce_creation:
|
||||
raise ConnectorValidationError(str(e))
|
||||
else:
|
||||
return False
|
||||
|
||||
runnable_connector.validate_connector_settings()
|
||||
return True
|
||||
|
||||
@@ -305,6 +305,7 @@ class GmailConnector(LoadConnector, PollConnector, SlimConnector):
|
||||
userId=user_email,
|
||||
fields=THREAD_FIELDS,
|
||||
id=thread["id"],
|
||||
continue_on_404_or_403=True,
|
||||
)
|
||||
# full_threads is an iterator containing a single thread
|
||||
# so we need to convert it to a list and grab the first element
|
||||
@@ -336,6 +337,7 @@ class GmailConnector(LoadConnector, PollConnector, SlimConnector):
|
||||
userId=user_email,
|
||||
fields=THREAD_LIST_FIELDS,
|
||||
q=query,
|
||||
continue_on_404_or_403=True,
|
||||
):
|
||||
doc_batch.append(
|
||||
SlimDocument(
|
||||
|
||||
@@ -36,12 +36,16 @@ from onyx.connectors.google_utils.shared_constants import ONYX_SCOPE_INSTRUCTION
|
||||
from onyx.connectors.google_utils.shared_constants import SCOPE_DOC_URL
|
||||
from onyx.connectors.google_utils.shared_constants import SLIM_BATCH_SIZE
|
||||
from onyx.connectors.google_utils.shared_constants import USER_FIELDS
|
||||
from onyx.connectors.interfaces import ConnectorValidationError
|
||||
from onyx.connectors.interfaces import CredentialExpiredError
|
||||
from onyx.connectors.interfaces import GenerateDocumentsOutput
|
||||
from onyx.connectors.interfaces import GenerateSlimDocumentOutput
|
||||
from onyx.connectors.interfaces import InsufficientPermissionsError
|
||||
from onyx.connectors.interfaces import LoadConnector
|
||||
from onyx.connectors.interfaces import PollConnector
|
||||
from onyx.connectors.interfaces import SecondsSinceUnixEpoch
|
||||
from onyx.connectors.interfaces import SlimConnector
|
||||
from onyx.connectors.models import ConnectorMissingCredentialError
|
||||
from onyx.indexing.indexing_heartbeat import IndexingHeartbeatInterface
|
||||
from onyx.utils.logger import setup_logger
|
||||
from onyx.utils.retry_wrapper import retry_builder
|
||||
@@ -137,7 +141,7 @@ class GoogleDriveConnector(LoadConnector, PollConnector, SlimConnector):
|
||||
"Please visit the docs for help with the new setup: "
|
||||
f"{SCOPE_DOC_URL}"
|
||||
)
|
||||
raise ValueError(
|
||||
raise ConnectorValidationError(
|
||||
"Google Drive connector received old input parameters. "
|
||||
"Please visit the docs for help with the new setup: "
|
||||
f"{SCOPE_DOC_URL}"
|
||||
@@ -151,7 +155,7 @@ class GoogleDriveConnector(LoadConnector, PollConnector, SlimConnector):
|
||||
and not my_drive_emails
|
||||
and not shared_drive_urls
|
||||
):
|
||||
raise ValueError(
|
||||
raise ConnectorValidationError(
|
||||
"Nothing to index. Please specify at least one of the following: "
|
||||
"include_shared_drives, include_my_drives, include_files_shared_with_me, "
|
||||
"shared_folder_urls, or my_drive_emails"
|
||||
@@ -220,7 +224,15 @@ class GoogleDriveConnector(LoadConnector, PollConnector, SlimConnector):
|
||||
return self._creds
|
||||
|
||||
def load_credentials(self, credentials: dict[str, Any]) -> dict[str, str] | None:
|
||||
self._primary_admin_email = credentials[DB_CREDENTIALS_PRIMARY_ADMIN_KEY]
|
||||
try:
|
||||
self._primary_admin_email = credentials[DB_CREDENTIALS_PRIMARY_ADMIN_KEY]
|
||||
except KeyError:
|
||||
logger.debug(f"Credentials: {credentials}") # TODO: remove
|
||||
raise ValueError(
|
||||
"Primary admin email missing, "
|
||||
"should not call this property "
|
||||
"before calling load_credentials"
|
||||
)
|
||||
|
||||
self._creds, new_creds_dict = get_google_creds(
|
||||
credentials=credentials,
|
||||
@@ -602,3 +614,70 @@ class GoogleDriveConnector(LoadConnector, PollConnector, SlimConnector):
|
||||
if MISSING_SCOPES_ERROR_STR in str(e):
|
||||
raise PermissionError(ONYX_SCOPE_INSTRUCTIONS) from e
|
||||
raise e
|
||||
|
||||
def validate_connector_settings(self) -> None:
|
||||
"""
|
||||
Validate that we can connect to Google Drive (and optionally Admin APIs) with the provided credentials.
|
||||
Attempts a small listing of files to confirm scope and access.
|
||||
|
||||
Raises:
|
||||
ConnectorMissingCredentialError: If no credentials are loaded.
|
||||
CredentialExpiredError: If credentials are invalid/expired (HTTP 401).
|
||||
InsufficientPermissionsError: If we lack the Drive scope or are otherwise denied (HTTP 403).
|
||||
ConnectorValidationError: Any other unexpected errors (e.g. missing domain, no files).
|
||||
"""
|
||||
|
||||
if self._creds is None:
|
||||
raise ConnectorMissingCredentialError(
|
||||
"Google Drive credentials not loaded."
|
||||
)
|
||||
|
||||
if self._primary_admin_email is None:
|
||||
raise ConnectorValidationError(
|
||||
"Primary admin email not found in credentials. "
|
||||
"Ensure DB_CREDENTIALS_PRIMARY_ADMIN_KEY is set."
|
||||
)
|
||||
|
||||
try:
|
||||
# Try a minimal file listing to confirm we have scope and valid credentials
|
||||
drive_service = get_drive_service(self._creds, self._primary_admin_email)
|
||||
response = (
|
||||
drive_service.files().list(pageSize=1, fields="files(id)").execute()
|
||||
)
|
||||
|
||||
# If listing returns no files, that's OK for validation
|
||||
# but we've at least confirmed we have the necessary scopes and can connect.
|
||||
# If you *require* at least 1 file in Drive, you could handle that here.
|
||||
_ = response.get("files", [])
|
||||
|
||||
if isinstance(self._creds, ServiceAccountCredentials):
|
||||
retry_builder()(get_root_folder_id)(drive_service)
|
||||
|
||||
except HttpError as e:
|
||||
status_code = e.resp.status if e.resp else None
|
||||
if status_code == 401:
|
||||
raise CredentialExpiredError(
|
||||
"Invalid or expired Google Drive credentials (401)."
|
||||
)
|
||||
elif status_code == 403:
|
||||
# Could mean missing scopes or the account lacks permission
|
||||
raise InsufficientPermissionsError(
|
||||
"Google Drive app lacks required permissions (403). "
|
||||
"Please ensure the necessary scopes are granted and Drive "
|
||||
"apps are enabled."
|
||||
)
|
||||
else:
|
||||
raise ConnectorValidationError(
|
||||
f"Unexpected Google Drive error (status={status_code}): {e}"
|
||||
)
|
||||
|
||||
except Exception as e:
|
||||
# Check for scope-related hints from the error message
|
||||
if MISSING_SCOPES_ERROR_STR in str(e):
|
||||
raise InsufficientPermissionsError(
|
||||
"Google Drive credentials are missing required scopes. "
|
||||
f"{ONYX_SCOPE_INSTRUCTIONS}"
|
||||
)
|
||||
raise ConnectorValidationError(
|
||||
f"Unexpected error during Google Drive validation: {e}"
|
||||
)
|
||||
|
||||
@@ -87,16 +87,18 @@ class HubSpotConnector(LoadConnector, PollConnector):
|
||||
contact = api_client.crm.contacts.basic_api.get_by_id(
|
||||
contact_id=contact.id
|
||||
)
|
||||
associated_emails.append(contact.properties["email"])
|
||||
email = contact.properties.get("email")
|
||||
if email is not None:
|
||||
associated_emails.append(email)
|
||||
|
||||
if notes:
|
||||
for note in notes.results:
|
||||
note = api_client.crm.objects.notes.basic_api.get_by_id(
|
||||
note_id=note.id, properties=["content", "hs_body_preview"]
|
||||
)
|
||||
if note.properties["hs_body_preview"] is None:
|
||||
continue
|
||||
associated_notes.append(note.properties["hs_body_preview"])
|
||||
preview = note.properties.get("hs_body_preview")
|
||||
if preview is not None:
|
||||
associated_notes.append(preview)
|
||||
|
||||
associated_emails_str = " ,".join(associated_emails)
|
||||
associated_notes_str = " ".join(associated_notes)
|
||||
|
||||
@@ -148,7 +148,15 @@ class CheckpointConnector(BaseConnector):
|
||||
raise NotImplementedError
|
||||
|
||||
|
||||
class ConnectorValidationError(Exception):
|
||||
class ValidationError(Exception):
|
||||
"""General exception for validation errors."""
|
||||
|
||||
def __init__(self, message: str):
|
||||
self.message = message
|
||||
super().__init__(self.message)
|
||||
|
||||
|
||||
class ConnectorValidationError(ValidationError):
|
||||
"""General exception for connector validation errors."""
|
||||
|
||||
def __init__(self, message: str):
|
||||
@@ -156,7 +164,7 @@ class ConnectorValidationError(Exception):
|
||||
super().__init__(self.message)
|
||||
|
||||
|
||||
class UnexpectedError(Exception):
|
||||
class UnexpectedError(ValidationError):
|
||||
"""Raised when an unexpected error occurs during connector validation.
|
||||
|
||||
Unexpected errors don't necessarily mean the credential is invalid,
|
||||
|
||||
@@ -23,6 +23,7 @@ from onyx.connectors.interfaces import InsufficientPermissionsError
|
||||
from onyx.connectors.interfaces import LoadConnector
|
||||
from onyx.connectors.interfaces import PollConnector
|
||||
from onyx.connectors.interfaces import SecondsSinceUnixEpoch
|
||||
from onyx.connectors.interfaces import UnexpectedError
|
||||
from onyx.connectors.models import ConnectorMissingCredentialError
|
||||
from onyx.connectors.models import Document
|
||||
from onyx.connectors.models import Section
|
||||
@@ -670,12 +671,12 @@ class NotionConnector(LoadConnector, PollConnector):
|
||||
"Please try again later."
|
||||
)
|
||||
else:
|
||||
raise Exception(
|
||||
raise UnexpectedError(
|
||||
f"Unexpected Notion HTTP error (status={status_code}): {http_err}"
|
||||
) from http_err
|
||||
|
||||
except Exception as exc:
|
||||
raise Exception(
|
||||
raise UnexpectedError(
|
||||
f"Unexpected error during Notion settings validation: {exc}"
|
||||
)
|
||||
|
||||
|
||||
@@ -20,9 +20,13 @@ from onyx.configs.app_configs import INDEX_BATCH_SIZE
|
||||
from onyx.configs.constants import DocumentSource
|
||||
from onyx.connectors.interfaces import CheckpointConnector
|
||||
from onyx.connectors.interfaces import CheckpointOutput
|
||||
from onyx.connectors.interfaces import ConnectorValidationError
|
||||
from onyx.connectors.interfaces import CredentialExpiredError
|
||||
from onyx.connectors.interfaces import GenerateSlimDocumentOutput
|
||||
from onyx.connectors.interfaces import InsufficientPermissionsError
|
||||
from onyx.connectors.interfaces import SecondsSinceUnixEpoch
|
||||
from onyx.connectors.interfaces import SlimConnector
|
||||
from onyx.connectors.interfaces import UnexpectedError
|
||||
from onyx.connectors.models import BasicExpertInfo
|
||||
from onyx.connectors.models import ConnectorCheckpoint
|
||||
from onyx.connectors.models import ConnectorFailure
|
||||
@@ -82,14 +86,14 @@ def get_channels(
|
||||
get_public: bool = True,
|
||||
get_private: bool = True,
|
||||
) -> list[ChannelType]:
|
||||
"""Get all channels in the workspace"""
|
||||
"""Get all channels in the workspace."""
|
||||
channels: list[dict[str, Any]] = []
|
||||
channel_types = []
|
||||
if get_public:
|
||||
channel_types.append("public_channel")
|
||||
if get_private:
|
||||
channel_types.append("private_channel")
|
||||
# try getting private channels as well at first
|
||||
# Try fetching both public and private channels first:
|
||||
try:
|
||||
channels = _collect_paginated_channels(
|
||||
client=client,
|
||||
@@ -97,19 +101,18 @@ def get_channels(
|
||||
channel_types=channel_types,
|
||||
)
|
||||
except SlackApiError as e:
|
||||
logger.info(f"Unable to fetch private channels due to - {e}")
|
||||
logger.info("trying again without private channels")
|
||||
logger.info(f"Unable to fetch private channels due to: {e}")
|
||||
logger.info("Trying again without private channels.")
|
||||
if get_public:
|
||||
channel_types = ["public_channel"]
|
||||
else:
|
||||
logger.warning("No channels to fetch")
|
||||
logger.warning("No channels to fetch.")
|
||||
return []
|
||||
channels = _collect_paginated_channels(
|
||||
client=client,
|
||||
exclude_archived=exclude_archived,
|
||||
channel_types=channel_types,
|
||||
)
|
||||
|
||||
return channels
|
||||
|
||||
|
||||
@@ -666,6 +669,92 @@ class SlackConnector(SlimConnector, CheckpointConnector):
|
||||
)
|
||||
return checkpoint
|
||||
|
||||
def validate_connector_settings(self) -> None:
|
||||
"""
|
||||
1. Verifies the bot token is valid for the workspace (via auth_test).
|
||||
2. Ensures the bot has enough scope to list channels.
|
||||
3. Checks that every channel specified in self.channels exists.
|
||||
"""
|
||||
if self.client is None:
|
||||
raise ConnectorMissingCredentialError("Slack credentials not loaded.")
|
||||
|
||||
try:
|
||||
# 1) Validate connection to workspace
|
||||
auth_response = self.client.auth_test()
|
||||
if not auth_response.get("ok", False):
|
||||
error_msg = auth_response.get(
|
||||
"error", "Unknown error from Slack auth_test"
|
||||
)
|
||||
raise ConnectorValidationError(f"Failed Slack auth_test: {error_msg}")
|
||||
|
||||
# 2) Minimal test to confirm listing channels works
|
||||
test_resp = self.client.conversations_list(
|
||||
limit=1, types=["public_channel"]
|
||||
)
|
||||
if not test_resp.get("ok", False):
|
||||
error_msg = test_resp.get("error", "Unknown error from Slack")
|
||||
if error_msg == "invalid_auth":
|
||||
raise ConnectorValidationError(
|
||||
f"Invalid Slack bot token ({error_msg})."
|
||||
)
|
||||
elif error_msg == "not_authed":
|
||||
raise CredentialExpiredError(
|
||||
f"Invalid or expired Slack bot token ({error_msg})."
|
||||
)
|
||||
raise UnexpectedError(f"Slack API returned a failure: {error_msg}")
|
||||
|
||||
# 3) If channels are specified, verify each is accessible
|
||||
if self.channels:
|
||||
accessible_channels = get_channels(
|
||||
client=self.client,
|
||||
exclude_archived=True,
|
||||
get_public=True,
|
||||
get_private=True,
|
||||
)
|
||||
# For quick lookups by name or ID, build a map:
|
||||
accessible_channel_names = {ch["name"] for ch in accessible_channels}
|
||||
accessible_channel_ids = {ch["id"] for ch in accessible_channels}
|
||||
|
||||
for user_channel in self.channels:
|
||||
# If your connector expects channel "names" (e.g., "general"),
|
||||
# verify user_channel is in channel names. Otherwise, if you
|
||||
# expect channel "ids" (e.g., "C12345"), check accessible_channel_ids.
|
||||
if (
|
||||
user_channel not in accessible_channel_names
|
||||
and user_channel not in accessible_channel_ids
|
||||
):
|
||||
raise ConnectorValidationError(
|
||||
f"Channel '{user_channel}' not found or inaccessible in this workspace."
|
||||
)
|
||||
|
||||
except SlackApiError as e:
|
||||
slack_error = e.response.get("error", "")
|
||||
if slack_error == "missing_scope":
|
||||
# The needed scope is typically "channels:read" or "groups:read"
|
||||
# for reading channels. The error may contain more detail.
|
||||
raise InsufficientPermissionsError(
|
||||
"Slack bot token lacks the necessary scope to list/access channels. "
|
||||
"Please ensure your Slack app has 'channels:read' (and/or 'groups:read' for private channels)."
|
||||
)
|
||||
elif slack_error == "invalid_auth":
|
||||
raise CredentialExpiredError(
|
||||
f"Invalid Slack bot token ({slack_error})."
|
||||
)
|
||||
elif slack_error == "not_authed":
|
||||
raise CredentialExpiredError(
|
||||
f"Invalid or expired Slack bot token ({slack_error})."
|
||||
)
|
||||
else:
|
||||
raise UnexpectedError(
|
||||
f"Unexpected Slack error '{slack_error}' during settings validation."
|
||||
)
|
||||
except ConnectorValidationError as e:
|
||||
raise e
|
||||
except Exception as e:
|
||||
raise UnexpectedError(
|
||||
f"Unexpected error during Slack settings validation: {e}"
|
||||
)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
import os
|
||||
|
||||
@@ -5,6 +5,7 @@ from typing import Any
|
||||
|
||||
import msal # type: ignore
|
||||
from office365.graph_client import GraphClient # type: ignore
|
||||
from office365.runtime.client_request_exception import ClientRequestException # type: ignore
|
||||
from office365.teams.channels.channel import Channel # type: ignore
|
||||
from office365.teams.chats.messages.message import ChatMessage # type: ignore
|
||||
from office365.teams.team import Team # type: ignore
|
||||
@@ -12,10 +13,14 @@ from office365.teams.team import Team # type: ignore
|
||||
from onyx.configs.app_configs import INDEX_BATCH_SIZE
|
||||
from onyx.configs.constants import DocumentSource
|
||||
from onyx.connectors.cross_connector_utils.miscellaneous_utils import time_str_to_utc
|
||||
from onyx.connectors.interfaces import ConnectorValidationError
|
||||
from onyx.connectors.interfaces import CredentialExpiredError
|
||||
from onyx.connectors.interfaces import GenerateDocumentsOutput
|
||||
from onyx.connectors.interfaces import InsufficientPermissionsError
|
||||
from onyx.connectors.interfaces import LoadConnector
|
||||
from onyx.connectors.interfaces import PollConnector
|
||||
from onyx.connectors.interfaces import SecondsSinceUnixEpoch
|
||||
from onyx.connectors.interfaces import UnexpectedError
|
||||
from onyx.connectors.models import BasicExpertInfo
|
||||
from onyx.connectors.models import ConnectorMissingCredentialError
|
||||
from onyx.connectors.models import Document
|
||||
@@ -279,6 +284,64 @@ class TeamsConnector(LoadConnector, PollConnector):
|
||||
end_datetime = datetime.fromtimestamp(end, timezone.utc)
|
||||
return self._fetch_from_teams(start=start_datetime, end=end_datetime)
|
||||
|
||||
def validate_connector_settings(self) -> None:
|
||||
"""
|
||||
Validate that we can connect to Microsoft Teams with the provided MSAL/Graph credentials
|
||||
and that we can see at least one Team. If the user has specified a list of Teams by name,
|
||||
confirm at least one of them is found.
|
||||
|
||||
Raises:
|
||||
ConnectorMissingCredentialError: If the Graph client is not yet set (missing credentials).
|
||||
CredentialExpiredError: If credentials appear invalid/expired (e.g. 401 Unauthorized).
|
||||
InsufficientPermissionsError: If the app lacks required permissions to read Teams.
|
||||
ConnectorValidationError: If no Teams are found, or if requested Teams are not found.
|
||||
"""
|
||||
if self.graph_client is None:
|
||||
raise ConnectorMissingCredentialError("Teams credentials not loaded.")
|
||||
|
||||
try:
|
||||
# Minimal call to confirm we can retrieve Teams
|
||||
found_teams = self._get_all_teams()
|
||||
|
||||
# You may optionally catch the Graph/Office365 request exception if available:
|
||||
except ClientRequestException as e:
|
||||
status_code = e.response.status_code
|
||||
if status_code == 401:
|
||||
raise CredentialExpiredError(
|
||||
"Invalid or expired Microsoft Teams credentials (401 Unauthorized)."
|
||||
)
|
||||
elif status_code == 403:
|
||||
raise InsufficientPermissionsError(
|
||||
"Your app lacks sufficient permissions to read Teams (403 Forbidden)."
|
||||
)
|
||||
else:
|
||||
raise UnexpectedError(f"Unexpected error retrieving teams: {e}")
|
||||
|
||||
except Exception as e:
|
||||
error_str = str(e).lower()
|
||||
if (
|
||||
"unauthorized" in error_str
|
||||
or "401" in error_str
|
||||
or "invalid_grant" in error_str
|
||||
):
|
||||
raise CredentialExpiredError(
|
||||
"Invalid or expired Microsoft Teams credentials."
|
||||
)
|
||||
elif "forbidden" in error_str or "403" in error_str:
|
||||
raise InsufficientPermissionsError(
|
||||
"App lacks required permissions to read from Microsoft Teams."
|
||||
)
|
||||
raise ConnectorValidationError(
|
||||
f"Unexpected error during Teams validation: {e}"
|
||||
)
|
||||
|
||||
# If we get this far, the Graph call succeeded. Check for presence of Teams:
|
||||
if not found_teams:
|
||||
raise ConnectorValidationError(
|
||||
"No Teams found for the given credentials. "
|
||||
"Either there are no Teams in this tenant, or your app does not have permission to view them."
|
||||
)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
connector = TeamsConnector(teams=os.environ["TEAMS"].split(","))
|
||||
|
||||
@@ -440,7 +440,10 @@ class WebConnector(LoadConnector):
|
||||
"No URL configured. Please provide at least one valid URL."
|
||||
)
|
||||
|
||||
if self.web_connector_type == WEB_CONNECTOR_VALID_SETTINGS.SITEMAP.value:
|
||||
if (
|
||||
self.web_connector_type == WEB_CONNECTOR_VALID_SETTINGS.SITEMAP.value
|
||||
or self.web_connector_type == WEB_CONNECTOR_VALID_SETTINGS.RECURSIVE.value
|
||||
):
|
||||
return None
|
||||
|
||||
# We'll just test the first URL for connectivity and correctness
|
||||
|
||||
@@ -194,9 +194,14 @@ def get_connector_credential_pair_from_id_for_user(
|
||||
def get_connector_credential_pair_from_id(
|
||||
db_session: Session,
|
||||
cc_pair_id: int,
|
||||
eager_load_credential: bool = False,
|
||||
) -> ConnectorCredentialPair | None:
|
||||
stmt = select(ConnectorCredentialPair).distinct()
|
||||
stmt = stmt.where(ConnectorCredentialPair.id == cc_pair_id)
|
||||
|
||||
if eager_load_credential:
|
||||
stmt = stmt.options(joinedload(ConnectorCredentialPair.credential))
|
||||
|
||||
result = db_session.execute(stmt)
|
||||
return result.scalar_one_or_none()
|
||||
|
||||
@@ -396,8 +401,8 @@ def add_credential_to_connector(
|
||||
# If we are in the seeding flow, we shouldn't need to check if the credential belongs to the user
|
||||
if seeding_flow:
|
||||
credential = fetch_credential_by_id(
|
||||
db_session=db_session,
|
||||
credential_id=credential_id,
|
||||
db_session=db_session,
|
||||
)
|
||||
else:
|
||||
credential = fetch_credential_by_id_for_user(
|
||||
|
||||
@@ -169,8 +169,8 @@ def fetch_credential_by_id_for_user(
|
||||
|
||||
|
||||
def fetch_credential_by_id(
|
||||
db_session: Session,
|
||||
credential_id: int,
|
||||
db_session: Session,
|
||||
) -> Credential | None:
|
||||
stmt = select(Credential).distinct()
|
||||
stmt = stmt.where(Credential.id == credential_id)
|
||||
@@ -422,8 +422,8 @@ def create_initial_public_credential(db_session: Session) -> None:
|
||||
"There must exist an empty public credential for data connectors that do not require additional Auth."
|
||||
)
|
||||
first_credential = fetch_credential_by_id(
|
||||
db_session=db_session,
|
||||
credential_id=PUBLIC_CREDENTIAL_ID,
|
||||
db_session=db_session,
|
||||
)
|
||||
|
||||
if first_credential is not None:
|
||||
|
||||
@@ -26,7 +26,7 @@ from onyx.background.indexing.models import IndexAttemptErrorPydantic
|
||||
from onyx.configs.constants import OnyxCeleryPriority
|
||||
from onyx.configs.constants import OnyxCeleryTask
|
||||
from onyx.connectors.factory import validate_ccpair_for_user
|
||||
from onyx.connectors.interfaces import ConnectorValidationError
|
||||
from onyx.connectors.interfaces import ValidationError
|
||||
from onyx.db.connector import delete_connector
|
||||
from onyx.db.connector_credential_pair import add_credential_to_connector
|
||||
from onyx.db.connector_credential_pair import (
|
||||
@@ -620,9 +620,7 @@ def associate_credential_to_connector(
|
||||
)
|
||||
|
||||
try:
|
||||
validate_ccpair_for_user(
|
||||
connector_id, credential_id, db_session, user, tenant_id
|
||||
)
|
||||
validate_ccpair_for_user(connector_id, credential_id, db_session, tenant_id)
|
||||
|
||||
response = add_credential_to_connector(
|
||||
db_session=db_session,
|
||||
@@ -649,7 +647,7 @@ def associate_credential_to_connector(
|
||||
|
||||
return response
|
||||
|
||||
except ConnectorValidationError as e:
|
||||
except ValidationError as e:
|
||||
# If validation fails, delete the connector and commit the changes
|
||||
# Ensures we don't leave invalid connectors in the database
|
||||
# NOTE: consensus is that it makes sense to unify connector and ccpair creation flows
|
||||
@@ -660,7 +658,6 @@ def associate_credential_to_connector(
|
||||
raise HTTPException(
|
||||
status_code=400, detail="Connector validation error: " + str(e)
|
||||
)
|
||||
|
||||
except IntegrityError as e:
|
||||
logger.error(f"IntegrityError: {e}")
|
||||
raise HTTPException(status_code=400, detail="Name must be unique")
|
||||
|
||||
@@ -854,7 +854,6 @@ def create_connector_with_mock_credential(
|
||||
connector_id=connector_id,
|
||||
credential_id=credential_id,
|
||||
db_session=db_session,
|
||||
user=user,
|
||||
tenant_id=tenant_id,
|
||||
)
|
||||
response = add_credential_to_connector(
|
||||
|
||||
@@ -106,7 +106,6 @@ def swap_credentials_for_connector(
|
||||
credential_swap_req.connector_id,
|
||||
credential_swap_req.new_credential_id,
|
||||
db_session,
|
||||
user,
|
||||
tenant_id,
|
||||
)
|
||||
|
||||
|
||||
@@ -8,6 +8,7 @@ import { buildCCPairInfoUrl } from "./lib";
|
||||
import { setCCPairStatus } from "@/lib/ccPair";
|
||||
import { useState } from "react";
|
||||
import { LoadingAnimation } from "@/components/Loading";
|
||||
import { ConfirmEntityModal } from "@/components/modals/ConfirmEntityModal";
|
||||
|
||||
export function ModifyStatusButtonCluster({
|
||||
ccPair,
|
||||
@@ -16,11 +17,24 @@ export function ModifyStatusButtonCluster({
|
||||
}) {
|
||||
const { popup, setPopup } = usePopup();
|
||||
const [isUpdating, setIsUpdating] = useState(false);
|
||||
const [showConfirmModal, setShowConfirmModal] = useState(false);
|
||||
|
||||
const handleStatusChange = async (
|
||||
newStatus: ConnectorCredentialPairStatus
|
||||
) => {
|
||||
if (isUpdating) return; // Prevent double-clicks or multiple requests
|
||||
|
||||
if (
|
||||
ccPair.status === ConnectorCredentialPairStatus.INVALID &&
|
||||
newStatus === ConnectorCredentialPairStatus.ACTIVE
|
||||
) {
|
||||
setShowConfirmModal(true);
|
||||
} else {
|
||||
await updateStatus(newStatus);
|
||||
}
|
||||
};
|
||||
|
||||
const updateStatus = async (newStatus: ConnectorCredentialPairStatus) => {
|
||||
setIsUpdating(true);
|
||||
|
||||
try {
|
||||
@@ -39,12 +53,14 @@ export function ModifyStatusButtonCluster({
|
||||
|
||||
// Compute the button text based on current state and backend status
|
||||
const buttonText =
|
||||
ccPair.status === ConnectorCredentialPairStatus.PAUSED
|
||||
ccPair.status === ConnectorCredentialPairStatus.PAUSED ||
|
||||
ccPair.status === ConnectorCredentialPairStatus.INVALID
|
||||
? "Re-Enable"
|
||||
: "Pause";
|
||||
|
||||
const tooltip =
|
||||
ccPair.status === ConnectorCredentialPairStatus.PAUSED
|
||||
ccPair.status === ConnectorCredentialPairStatus.PAUSED ||
|
||||
ccPair.status === ConnectorCredentialPairStatus.INVALID
|
||||
? "Click to start indexing again!"
|
||||
: "When paused, the connector's documents will still be visible. However, no new documents will be indexed.";
|
||||
|
||||
@@ -54,14 +70,16 @@ export function ModifyStatusButtonCluster({
|
||||
<Button
|
||||
className="flex items-center justify-center w-auto min-w-[100px] px-4 py-2"
|
||||
variant={
|
||||
ccPair.status === ConnectorCredentialPairStatus.PAUSED
|
||||
ccPair.status === ConnectorCredentialPairStatus.PAUSED ||
|
||||
ccPair.status === ConnectorCredentialPairStatus.INVALID
|
||||
? "success-reverse"
|
||||
: "default"
|
||||
}
|
||||
disabled={isUpdating}
|
||||
onClick={() =>
|
||||
handleStatusChange(
|
||||
ccPair.status === ConnectorCredentialPairStatus.PAUSED
|
||||
ccPair.status === ConnectorCredentialPairStatus.PAUSED ||
|
||||
ccPair.status === ConnectorCredentialPairStatus.INVALID
|
||||
? ConnectorCredentialPairStatus.ACTIVE
|
||||
: ConnectorCredentialPairStatus.PAUSED
|
||||
)
|
||||
@@ -71,7 +89,8 @@ export function ModifyStatusButtonCluster({
|
||||
{isUpdating ? (
|
||||
<LoadingAnimation
|
||||
text={
|
||||
ccPair.status === ConnectorCredentialPairStatus.PAUSED
|
||||
ccPair.status === ConnectorCredentialPairStatus.PAUSED ||
|
||||
ccPair.status === ConnectorCredentialPairStatus.INVALID
|
||||
? "Resuming"
|
||||
: "Pausing"
|
||||
}
|
||||
@@ -81,6 +100,20 @@ export function ModifyStatusButtonCluster({
|
||||
buttonText
|
||||
)}
|
||||
</Button>
|
||||
{showConfirmModal && (
|
||||
<ConfirmEntityModal
|
||||
entityType="Invalid Connector"
|
||||
entityName={ccPair.name}
|
||||
onClose={() => setShowConfirmModal(false)}
|
||||
onSubmit={() => {
|
||||
setShowConfirmModal(false);
|
||||
updateStatus(ConnectorCredentialPairStatus.ACTIVE);
|
||||
}}
|
||||
additionalDetails="This connector was previously marked as invalid. Please verify that your configuration is correct before re-enabling. Are you sure you want to proceed?"
|
||||
actionButtonText="Re-Enable"
|
||||
variant="action"
|
||||
/>
|
||||
)}
|
||||
</>
|
||||
);
|
||||
}
|
||||
|
||||
@@ -123,7 +123,8 @@ export function ReIndexButton({
|
||||
disabled={
|
||||
isDisabled ||
|
||||
ccPairStatus == ConnectorCredentialPairStatus.DELETING ||
|
||||
ccPairStatus == ConnectorCredentialPairStatus.PAUSED
|
||||
ccPairStatus == ConnectorCredentialPairStatus.PAUSED ||
|
||||
ccPairStatus == ConnectorCredentialPairStatus.INVALID
|
||||
}
|
||||
tooltip={getCCPairStatusMessage(isDisabled, isIndexing, ccPairStatus)}
|
||||
>
|
||||
|
||||
@@ -168,7 +168,7 @@ export default function CredentialSection({
|
||||
onClick={() => {
|
||||
setShowModifyCredential(true);
|
||||
}}
|
||||
className="flex items-center gap-x-2 cursor-pointer bg-background-100 border-border border-2 hover:bg-border p-1.5 rounded-lg text-text-700"
|
||||
className="flex items-center gap-x-2 cursor-pointer bg-neutral-800 border-neutral-600 border-2 hover:bg-neutral-700 p-1.5 rounded-lg text-neutral-300"
|
||||
>
|
||||
<FaSwatchbook />
|
||||
Update Credentials
|
||||
|
||||
@@ -37,7 +37,7 @@ export const ConfirmEntityModal = ({
|
||||
};
|
||||
|
||||
return (
|
||||
<Modal width="rounded max-w-sm w-full" onOutsideClick={onClose}>
|
||||
<Modal width="rounded max-w-md w-full" onOutsideClick={onClose}>
|
||||
<>
|
||||
<div className="flex mb-4">
|
||||
<h2 className="my-auto text-2xl font-bold">
|
||||
|
||||
Reference in New Issue
Block a user