Compare commits

...

8 Commits

Author SHA1 Message Date
pablonyx
5e75caf79d finalize 2025-02-18 12:43:14 -08:00
pablonyx
25223748d6 update 2025-02-18 11:20:11 -08:00
pablonyx
1fb8142160 k 2025-02-17 19:28:05 -08:00
pablonyx
d46bc78739 Additional connector validation classes (#4018) 2025-02-17 19:16:49 -08:00
pablonyx
620c205aad k 2025-02-17 19:16:48 -08:00
pablonyx
47ea740ab3 pretty 2025-02-17 19:16:48 -08:00
pablonyx
dc151744e7 minor cleanup 2025-02-17 19:16:48 -08:00
pablonyx
df0eb597ff Connector validation 2025-02-17 19:16:48 -08:00
33 changed files with 738 additions and 110 deletions

View File

@@ -0,0 +1,29 @@
"""remove inactive ccpair status on downgrade
Revision ID: acaab4ef4507
Revises: b7a7eee5aa15
Create Date: 2025-02-16 18:21:41.330212
"""
from alembic import op
from onyx.db.models import ConnectorCredentialPair
from onyx.db.enums import ConnectorCredentialPairStatus
from sqlalchemy import update
# revision identifiers, used by Alembic.
revision = "acaab4ef4507"
down_revision = "b7a7eee5aa15"
branch_labels = None
depends_on = None
def upgrade() -> None:
pass
def downgrade() -> None:
op.execute(
update(ConnectorCredentialPair)
.where(ConnectorCredentialPair.status == ConnectorCredentialPairStatus.INVALID)
.values(status=ConnectorCredentialPairStatus.ACTIVE)
)

View File

@@ -47,6 +47,7 @@ from onyx.configs.constants import OnyxCeleryTask
from onyx.configs.constants import OnyxRedisConstants
from onyx.configs.constants import OnyxRedisLocks
from onyx.configs.constants import OnyxRedisSignals
from onyx.connectors.interfaces import ConnectorValidationError
from onyx.db.connector import mark_ccpair_with_indexing_trigger
from onyx.db.connector_credential_pair import fetch_connector_credential_pairs
from onyx.db.connector_credential_pair import get_connector_credential_pair_from_id
@@ -103,6 +104,9 @@ class IndexingWatchdogTerminalStatus(str, Enum):
"index_attempt_mismatch" # expected index attempt metadata not found in db
)
CONNECTOR_VALIDATION_ERROR = (
"connector_validation_error" # the connector validation failed
)
CONNECTOR_EXCEPTIONED = "connector_exceptioned" # the connector itself exceptioned
WATCHDOG_EXCEPTIONED = "watchdog_exceptioned" # the watchdog exceptioned
@@ -121,6 +125,7 @@ class IndexingWatchdogTerminalStatus(str, Enum):
_ENUM_TO_CODE: dict[IndexingWatchdogTerminalStatus, int] = {
IndexingWatchdogTerminalStatus.PROCESS_SIGNAL_SIGKILL: -9,
IndexingWatchdogTerminalStatus.OUT_OF_MEMORY: 137,
IndexingWatchdogTerminalStatus.CONNECTOR_VALIDATION_ERROR: 247,
IndexingWatchdogTerminalStatus.BLOCKED_BY_DELETION: 248,
IndexingWatchdogTerminalStatus.BLOCKED_BY_STOP_SIGNAL: 249,
IndexingWatchdogTerminalStatus.FENCE_NOT_FOUND: 250,
@@ -137,6 +142,7 @@ class IndexingWatchdogTerminalStatus(str, Enum):
def from_code(cls, code: int) -> "IndexingWatchdogTerminalStatus":
_CODE_TO_ENUM: dict[int, IndexingWatchdogTerminalStatus] = {
-9: IndexingWatchdogTerminalStatus.PROCESS_SIGNAL_SIGKILL,
247: IndexingWatchdogTerminalStatus.CONNECTOR_VALIDATION_ERROR,
248: IndexingWatchdogTerminalStatus.BLOCKED_BY_DELETION,
249: IndexingWatchdogTerminalStatus.BLOCKED_BY_STOP_SIGNAL,
250: IndexingWatchdogTerminalStatus.FENCE_NOT_FOUND,
@@ -788,6 +794,15 @@ def connector_indexing_task(
# get back the total number of indexed docs and return it
n_final_progress = redis_connector_index.get_progress()
redis_connector_index.set_generator_complete(HTTPStatus.OK.value)
except ConnectorValidationError:
raise SimpleJobException(
f"Indexing task failed: attempt={index_attempt_id} "
f"tenant={tenant_id} "
f"cc_pair={cc_pair_id} "
f"search_settings={search_settings_id}",
code=IndexingWatchdogTerminalStatus.CONNECTOR_VALIDATION_ERROR.code,
)
except Exception as e:
logger.exception(
f"Indexing spawned task failed: attempt={index_attempt_id} "
@@ -795,8 +810,8 @@ def connector_indexing_task(
f"cc_pair={cc_pair_id} "
f"search_settings={search_settings_id}"
)
raise e
finally:
if lock.owned():
lock.release()
@@ -996,9 +1011,13 @@ def connector_indexing_proxy_task(
)
)
continue
except Exception:
except Exception as e:
result.status = IndexingWatchdogTerminalStatus.WATCHDOG_EXCEPTIONED
result.exception_str = traceback.format_exc()
if isinstance(e, ConnectorValidationError):
# No need to expose full stack trace for validation errors
result.exception_str = str(e)
else:
result.exception_str = traceback.format_exc()
# handle exit and reporting
elapsed = time.monotonic() - start

View File

@@ -425,6 +425,7 @@ def connector_pruning_generator_task(
f"cc_pair={cc_pair_id} "
f"connector_source={cc_pair.connector.source}"
)
runnable_connector = instantiate_connector(
db_session,
cc_pair.connector.source,

View File

@@ -21,6 +21,7 @@ from onyx.configs.constants import DocumentSource
from onyx.configs.constants import MilestoneRecordType
from onyx.connectors.connector_runner import ConnectorRunner
from onyx.connectors.factory import instantiate_connector
from onyx.connectors.interfaces import ConnectorValidationError
from onyx.connectors.models import ConnectorCheckpoint
from onyx.connectors.models import ConnectorFailure
from onyx.connectors.models import Document
@@ -86,6 +87,11 @@ def _get_connector_runner(
credential=attempt.connector_credential_pair.credential,
tenant_id=tenant_id,
)
# validate the connector settings
runnable_connector.validate_connector_settings()
except Exception as e:
logger.exception(f"Unable to instantiate connector due to {e}")
@@ -567,8 +573,28 @@ def _run_indexing(
"Connector run exceptioned after elapsed time: "
f"{time.monotonic() - start_time} seconds"
)
if isinstance(e, ConnectorValidationError):
# On validation errors during indexing, we want to cancel the indexing attempt
# and mark the CCPair as invalid. This prevents the connector from being
# used in the future until the credentials are updated.
with get_session_with_tenant(tenant_id) as db_session_temp:
mark_attempt_canceled(
index_attempt_id,
db_session_temp,
reason=str(e),
)
if isinstance(e, ConnectorStopSignal):
if ctx.is_primary:
update_connector_credential_pair(
db_session=db_session_temp,
connector_id=ctx.connector_id,
credential_id=ctx.credential_id,
status=ConnectorCredentialPairStatus.INVALID,
)
memory_tracer.stop()
raise e
elif isinstance(e, ConnectorStopSignal):
with get_session_with_tenant(tenant_id) as db_session_temp:
mark_attempt_canceled(
index_attempt_id,

View File

@@ -5,6 +5,8 @@ import requests
class BookStackClientRequestFailedError(ConnectionError):
def __init__(self, status: int, error: str) -> None:
self.status_code = status
self.error = error
super().__init__(
"BookStack Client request failed with status {status}: {error}".format(
status=status, error=error

View File

@@ -7,8 +7,12 @@ from typing import Any
from onyx.configs.app_configs import INDEX_BATCH_SIZE
from onyx.configs.constants import DocumentSource
from onyx.connectors.bookstack.client import BookStackApiClient
from onyx.connectors.bookstack.client import BookStackClientRequestFailedError
from onyx.connectors.cross_connector_utils.miscellaneous_utils import time_str_to_utc
from onyx.connectors.interfaces import ConnectorValidationError
from onyx.connectors.interfaces import CredentialExpiredError
from onyx.connectors.interfaces import GenerateDocumentsOutput
from onyx.connectors.interfaces import InsufficientPermissionsError
from onyx.connectors.interfaces import LoadConnector
from onyx.connectors.interfaces import PollConnector
from onyx.connectors.interfaces import SecondsSinceUnixEpoch
@@ -214,3 +218,39 @@ class BookstackConnector(LoadConnector, PollConnector):
break
else:
time.sleep(0.2)
def validate_connector_settings(self) -> None:
"""
Validate that the BookStack credentials and connector settings are correct.
Specifically checks that we can make an authenticated request to BookStack.
"""
if not self.bookstack_client:
raise ConnectorMissingCredentialError(
"BookStack credentials have not been loaded."
)
try:
# Attempt to fetch a small batch of books (arbitrary endpoint) to verify credentials
_ = self.bookstack_client.get(
"/books", params={"count": "1", "offset": "0"}
)
except BookStackClientRequestFailedError as e:
# Check for HTTP status codes
if e.status_code == 401:
raise CredentialExpiredError(
"Your BookStack credentials appear to be invalid or expired (HTTP 401)."
) from e
elif e.status_code == 403:
raise InsufficientPermissionsError(
"The configured BookStack token does not have sufficient permissions (HTTP 403)."
) from e
else:
raise ConnectorValidationError(
f"Unexpected BookStack error (status={e.status_code}): {e}"
) from e
except Exception as exc:
raise ConnectorValidationError(
f"Unexpected error while validating BookStack connector settings: {exc}"
) from exc

View File

@@ -4,12 +4,16 @@ from typing import Any
from dropbox import Dropbox # type: ignore
from dropbox.exceptions import ApiError # type:ignore
from dropbox.exceptions import AuthError # type:ignore
from dropbox.files import FileMetadata # type:ignore
from dropbox.files import FolderMetadata # type:ignore
from onyx.configs.app_configs import INDEX_BATCH_SIZE
from onyx.configs.constants import DocumentSource
from onyx.connectors.interfaces import ConnectorValidationError
from onyx.connectors.interfaces import CredentialInvalidError
from onyx.connectors.interfaces import GenerateDocumentsOutput
from onyx.connectors.interfaces import InsufficientPermissionsError
from onyx.connectors.interfaces import LoadConnector
from onyx.connectors.interfaces import PollConnector
from onyx.connectors.interfaces import SecondsSinceUnixEpoch
@@ -141,6 +145,29 @@ class DropboxConnector(LoadConnector, PollConnector):
return None
def validate_connector_settings(self) -> None:
if self.dropbox_client is None:
raise ConnectorMissingCredentialError("Dropbox credentials not loaded.")
try:
self.dropbox_client.files_list_folder(path="", limit=1)
except AuthError as e:
logger.exception("Failed to validate Dropbox credentials")
raise CredentialInvalidError(f"Dropbox credential is invalid: {e.error}")
except ApiError as e:
if (
e.error is not None
and "insufficient_permissions" in str(e.error).lower()
):
raise InsufficientPermissionsError(
"Your Dropbox token does not have sufficient permissions."
)
raise ConnectorValidationError(
f"Unexpected Dropbox error during validation: {e.user_message_text or e}"
)
except Exception as e:
raise Exception(f"Unexpected error during Dropbox settings validation: {e}")
if __name__ == "__main__":
import os

View File

@@ -31,6 +31,7 @@ from onyx.connectors.guru.connector import GuruConnector
from onyx.connectors.hubspot.connector import HubSpotConnector
from onyx.connectors.interfaces import BaseConnector
from onyx.connectors.interfaces import CheckpointConnector
from onyx.connectors.interfaces import ConnectorValidationError
from onyx.connectors.interfaces import EventConnector
from onyx.connectors.interfaces import LoadConnector
from onyx.connectors.interfaces import PollConnector
@@ -52,8 +53,11 @@ from onyx.connectors.wikipedia.connector import WikipediaConnector
from onyx.connectors.xenforo.connector import XenforoConnector
from onyx.connectors.zendesk.connector import ZendeskConnector
from onyx.connectors.zulip.connector import ZulipConnector
from onyx.db.connector import fetch_connector_by_id
from onyx.db.credentials import backend_update_credential_json
from onyx.db.credentials import fetch_credential_by_id_for_user
from onyx.db.models import Credential
from onyx.db.models import User
class ConnectorMissingException(Exception):
@@ -174,3 +178,39 @@ def instantiate_connector(
backend_update_credential_json(credential, new_credentials, db_session)
return connector
def validate_ccpair_for_user(
connector_id: int,
credential_id: int,
db_session: Session,
user: User | None,
tenant_id: str | None,
) -> None:
# Validate the connector settings
connector = fetch_connector_by_id(connector_id, db_session)
credential = fetch_credential_by_id_for_user(
credential_id,
user,
db_session,
get_editable=False,
)
if not credential:
raise ValueError("Credential not found")
if not connector:
raise ValueError("Connector not found")
try:
runnable_connector = instantiate_connector(
db_session=db_session,
source=connector.source,
input_type=connector.input_type,
connector_specific_config=connector.connector_specific_config,
credential=credential,
tenant_id=tenant_id,
)
except Exception as e:
error_msg = str(e)
raise ConnectorValidationError(error_msg)
runnable_connector.validate_connector_settings()

View File

@@ -9,6 +9,7 @@ from typing import cast
from github import Github
from github import RateLimitExceededException
from github import Repository
from github.GithubException import GithubException
from github.Issue import Issue
from github.PaginatedList import PaginatedList
from github.PullRequest import PullRequest
@@ -16,7 +17,10 @@ from github.PullRequest import PullRequest
from onyx.configs.app_configs import GITHUB_CONNECTOR_BASE_URL
from onyx.configs.app_configs import INDEX_BATCH_SIZE
from onyx.configs.constants import DocumentSource
from onyx.connectors.interfaces import ConnectorValidationError
from onyx.connectors.interfaces import CredentialExpiredError
from onyx.connectors.interfaces import GenerateDocumentsOutput
from onyx.connectors.interfaces import InsufficientPermissionsError
from onyx.connectors.interfaces import LoadConnector
from onyx.connectors.interfaces import PollConnector
from onyx.connectors.interfaces import SecondsSinceUnixEpoch
@@ -26,7 +30,6 @@ from onyx.connectors.models import Section
from onyx.utils.batching import batch_generator
from onyx.utils.logger import setup_logger
logger = setup_logger()
@@ -226,6 +229,48 @@ class GithubConnector(LoadConnector, PollConnector):
return self._fetch_from_github(adjusted_start_datetime, end_datetime)
def validate_connector_settings(self) -> None:
if self.github_client is None:
raise ConnectorMissingCredentialError("GitHub credentials not loaded.")
if not self.repo_owner or not self.repo_name:
raise ConnectorValidationError(
"Invalid connector settings: 'repo_owner' and 'repo_name' must be provided."
)
try:
test_repo = self.github_client.get_repo(
f"{self.repo_owner}/{self.repo_name}"
)
test_repo.get_contents("")
except RateLimitExceededException:
raise ConnectorValidationError(
"Validation failed due to GitHub rate-limits being exceeded. Please try again later."
)
except GithubException as e:
if e.status == 401:
raise CredentialExpiredError(
"GitHub credential appears to be invalid or expired (HTTP 401)."
)
elif e.status == 403:
raise InsufficientPermissionsError(
"Your GitHub token does not have sufficient permissions for this repository (HTTP 403)."
)
elif e.status == 404:
raise ConnectorValidationError(
f"GitHub repository not found with name: {self.repo_owner}/{self.repo_name}"
)
else:
raise ConnectorValidationError(
f"Unexpected GitHub error (status={e.status}): {e.data}"
)
except Exception as exc:
raise Exception(
f"Unexpected error during GitHub settings validation: {exc}"
)
if __name__ == "__main__":
import os

View File

@@ -12,7 +12,6 @@ from onyx.connectors.models import Document
from onyx.connectors.models import SlimDocument
from onyx.indexing.indexing_heartbeat import IndexingHeartbeatInterface
SecondsSinceUnixEpoch = float
GenerateDocumentsOutput = Iterator[list[Document]]
@@ -45,6 +44,14 @@ class BaseConnector(abc.ABC):
raise RuntimeError(custom_parser_req_msg)
return metadata_lines
def validate_connector_settings(self) -> None:
"""
Override this if your connector needs to validate credentials or settings.
Raise an exception if invalid, otherwise do nothing.
Default is a no-op (always successful).
"""
# Large set update or reindex, generally pulling a complete state or from a savestate file
class LoadConnector(BaseConnector):
@@ -139,3 +146,46 @@ class CheckpointConnector(BaseConnector):
```
"""
raise NotImplementedError
class ConnectorValidationError(Exception):
"""General exception for connector validation errors."""
def __init__(self, message: str):
self.message = message
super().__init__(self.message)
class UnexpectedError(Exception):
"""Raised when an unexpected error occurs during connector validation.
Unexpected errors don't necessarily mean the credential is invalid,
but rather that there was an error during the validation process
or we encountered a currently unhandled error case.
"""
def __init__(self, message: str = "Unexpected error during connector validation"):
super().__init__(message)
class CredentialInvalidError(ConnectorValidationError):
"""Raised when a connector's credential is invalid."""
def __init__(self, message: str = "Credential is invalid"):
super().__init__(message)
class CredentialExpiredError(ConnectorValidationError):
"""Raised when a connector's credential is expired."""
def __init__(self, message: str = "Credential has expired"):
super().__init__(message)
class InsufficientPermissionsError(ConnectorValidationError):
"""Raised when the credential does not have sufficient API permissions."""
def __init__(
self, message: str = "Insufficient permissions for the requested operation"
):
super().__init__(message)

View File

@@ -7,6 +7,7 @@ from datetime import timezone
from typing import Any
from typing import Optional
import requests
from retry import retry
from onyx.configs.app_configs import INDEX_BATCH_SIZE
@@ -15,10 +16,14 @@ from onyx.configs.constants import DocumentSource
from onyx.connectors.cross_connector_utils.rate_limit_wrapper import (
rl_requests,
)
from onyx.connectors.interfaces import ConnectorValidationError
from onyx.connectors.interfaces import CredentialExpiredError
from onyx.connectors.interfaces import GenerateDocumentsOutput
from onyx.connectors.interfaces import InsufficientPermissionsError
from onyx.connectors.interfaces import LoadConnector
from onyx.connectors.interfaces import PollConnector
from onyx.connectors.interfaces import SecondsSinceUnixEpoch
from onyx.connectors.models import ConnectorMissingCredentialError
from onyx.connectors.models import Document
from onyx.connectors.models import Section
from onyx.utils.batching import batch_generator
@@ -616,6 +621,64 @@ class NotionConnector(LoadConnector, PollConnector):
else:
break
def validate_connector_settings(self) -> None:
if not self.headers.get("Authorization"):
raise ConnectorMissingCredentialError("Notion credentials not loaded.")
try:
# We'll do a minimal search call (page_size=1) to confirm accessibility
if self.root_page_id:
# If root_page_id is set, fetch the specific page
res = rl_requests.get(
f"https://api.notion.com/v1/pages/{self.root_page_id}",
headers=self.headers,
timeout=_NOTION_CALL_TIMEOUT,
)
else:
# If root_page_id is not set, perform a minimal search
test_query = {
"filter": {"property": "object", "value": "page"},
"page_size": 1,
}
res = rl_requests.post(
"https://api.notion.com/v1/search",
headers=self.headers,
json=test_query,
timeout=_NOTION_CALL_TIMEOUT,
)
res.raise_for_status()
except requests.exceptions.HTTPError as http_err:
status_code = http_err.response.status_code if http_err.response else None
if status_code == 401:
raise CredentialExpiredError(
"Notion credential appears to be invalid or expired (HTTP 401)."
)
elif status_code == 403:
raise InsufficientPermissionsError(
"Your Notion token does not have sufficient permissions (HTTP 403)."
)
elif status_code == 404:
# Typically means resource not found or not shared. Could be root_page_id is invalid.
raise ConnectorValidationError(
"Notion resource not found or not shared with the integration (HTTP 404)."
)
elif status_code == 429:
raise ConnectorValidationError(
"Validation failed due to Notion rate-limits being exceeded (HTTP 429). "
"Please try again later."
)
else:
raise Exception(
f"Unexpected Notion HTTP error (status={status_code}): {http_err}"
) from http_err
except Exception as exc:
raise Exception(
f"Unexpected error during Notion settings validation: {exc}"
)
if __name__ == "__main__":
import os

View File

@@ -12,8 +12,11 @@ from onyx.configs.app_configs import JIRA_CONNECTOR_LABELS_TO_SKIP
from onyx.configs.app_configs import JIRA_CONNECTOR_MAX_TICKET_SIZE
from onyx.configs.constants import DocumentSource
from onyx.connectors.cross_connector_utils.miscellaneous_utils import time_str_to_utc
from onyx.connectors.interfaces import ConnectorValidationError
from onyx.connectors.interfaces import CredentialExpiredError
from onyx.connectors.interfaces import GenerateDocumentsOutput
from onyx.connectors.interfaces import GenerateSlimDocumentOutput
from onyx.connectors.interfaces import InsufficientPermissionsError
from onyx.connectors.interfaces import LoadConnector
from onyx.connectors.interfaces import PollConnector
from onyx.connectors.interfaces import SecondsSinceUnixEpoch
@@ -272,6 +275,40 @@ class JiraConnector(LoadConnector, PollConnector, SlimConnector):
yield slim_doc_batch
def validate_connector_settings(self) -> None:
if self._jira_client is None:
raise ConnectorMissingCredentialError("Jira")
if not self._jira_project:
raise ConnectorValidationError(
"Invalid connector settings: 'jira_project' must be provided."
)
try:
self.jira_client.project(self._jira_project)
except Exception as e:
status_code = getattr(e, "status_code", None)
if status_code == 401:
raise CredentialExpiredError(
"Jira credential appears to be expired or invalid (HTTP 401)."
)
elif status_code == 403:
raise InsufficientPermissionsError(
"Your Jira token does not have sufficient permissions for this project (HTTP 403)."
)
elif status_code == 404:
raise ConnectorValidationError(
f"Jira project not found with key: {self._jira_project}"
)
elif status_code == 429:
raise ConnectorValidationError(
"Validation failed due to Jira rate-limits being exceeded. Please try again later."
)
else:
raise Exception(f"Unexpected Jira error during validation: {e}")
if __name__ == "__main__":
import os

View File

@@ -25,8 +25,12 @@ from onyx.configs.app_configs import WEB_CONNECTOR_OAUTH_CLIENT_SECRET
from onyx.configs.app_configs import WEB_CONNECTOR_OAUTH_TOKEN_URL
from onyx.configs.app_configs import WEB_CONNECTOR_VALIDATE_URLS
from onyx.configs.constants import DocumentSource
from onyx.connectors.interfaces import ConnectorValidationError
from onyx.connectors.interfaces import CredentialExpiredError
from onyx.connectors.interfaces import GenerateDocumentsOutput
from onyx.connectors.interfaces import InsufficientPermissionsError
from onyx.connectors.interfaces import LoadConnector
from onyx.connectors.interfaces import UnexpectedError
from onyx.connectors.models import Document
from onyx.connectors.models import Section
from onyx.file_processing.extract_file_text import read_pdf_file
@@ -170,26 +174,35 @@ def start_playwright() -> Tuple[Playwright, BrowserContext]:
def extract_urls_from_sitemap(sitemap_url: str) -> list[str]:
response = requests.get(sitemap_url)
response.raise_for_status()
try:
response = requests.get(sitemap_url)
response.raise_for_status()
soup = BeautifulSoup(response.content, "html.parser")
urls = [
_ensure_absolute_url(sitemap_url, loc_tag.text)
for loc_tag in soup.find_all("loc")
]
soup = BeautifulSoup(response.content, "html.parser")
urls = [
_ensure_absolute_url(sitemap_url, loc_tag.text)
for loc_tag in soup.find_all("loc")
]
if len(urls) == 0 and len(soup.find_all("urlset")) == 0:
# the given url doesn't look like a sitemap, let's try to find one
urls = list_pages_for_site(sitemap_url)
if len(urls) == 0 and len(soup.find_all("urlset")) == 0:
# the given url doesn't look like a sitemap, let's try to find one
urls = list_pages_for_site(sitemap_url)
if len(urls) == 0:
raise ValueError(
f"No URLs found in sitemap {sitemap_url}. Try using the 'single' or 'recursive' scraping options instead."
if len(urls) == 0:
raise ValueError(
f"No URLs found in sitemap {sitemap_url}. Try using the 'single' or 'recursive' scraping options instead."
)
return urls
except requests.RequestException as e:
raise RuntimeError(f"Failed to fetch sitemap from {sitemap_url}: {str(e)}")
except ValueError as e:
raise RuntimeError(f"Error processing sitemap {sitemap_url}: {str(e)}")
except Exception as e:
raise RuntimeError(
f"Unexpected error while processing sitemap {sitemap_url}: {str(e)}"
)
return urls
def _ensure_absolute_url(source_url: str, maybe_relative_url: str) -> str:
if not urlparse(maybe_relative_url).netloc:
@@ -229,6 +242,7 @@ class WebConnector(LoadConnector):
self.mintlify_cleanup = mintlify_cleanup
self.batch_size = batch_size
self.recursive = False
self.web_connector_type = web_connector_type
if web_connector_type == WEB_CONNECTOR_VALID_SETTINGS.RECURSIVE.value:
self.recursive = True
@@ -402,6 +416,53 @@ class WebConnector(LoadConnector):
raise RuntimeError(last_error)
raise RuntimeError("No valid pages found.")
def validate_connector_settings(self) -> None:
# Make sure we have at least one valid URL to check
if not self.to_visit_list:
raise ConnectorValidationError(
"No URL configured. Please provide at least one valid URL."
)
if self.web_connector_type == WEB_CONNECTOR_VALID_SETTINGS.SITEMAP.value:
return None
# We'll just test the first URL for connectivity and correctness
test_url = self.to_visit_list[0]
# Check that the URL is allowed and well-formed
try:
protected_url_check(test_url)
except ValueError as e:
raise ConnectorValidationError(
f"Protected URL check failed for '{test_url}': {e}"
)
except ConnectionError as e:
# Typically DNS or other network issues
raise ConnectorValidationError(str(e))
# Make a quick request to see if we get a valid response
try:
check_internet_connection(test_url)
except Exception as e:
err_str = str(e)
if "401" in err_str:
raise CredentialExpiredError(
f"Unauthorized access to '{test_url}': {e}"
)
elif "403" in err_str:
raise InsufficientPermissionsError(
f"Forbidden access to '{test_url}': {e}"
)
elif "404" in err_str:
raise ConnectorValidationError(f"Page not found for '{test_url}': {e}")
elif "Max retries exceeded" in err_str and "NameResolutionError" in err_str:
raise ConnectorValidationError(
f"Unable to resolve hostname for '{test_url}'. Please check the URL and your internet connection."
)
else:
# Could be a 5xx or another error, treat as unexpected
raise UnexpectedError(f"Unexpected error validating '{test_url}': {e}")
if __name__ == "__main__":
connector = WebConnector("https://docs.onyx.app/")

View File

@@ -14,6 +14,7 @@ from onyx.configs.constants import DocumentSource
from onyx.connectors.google_utils.shared_constants import (
DB_CREDENTIALS_DICT_SERVICE_ACCOUNT_KEY,
)
from onyx.db.enums import ConnectorCredentialPairStatus
from onyx.db.models import ConnectorCredentialPair
from onyx.db.models import Credential
from onyx.db.models import Credential__UserGroup
@@ -245,6 +246,10 @@ def swap_credentials_connector(
existing_pair.credential_id = new_credential_id
existing_pair.credential = new_credential
# Update ccpair status if it's in INVALID state
if existing_pair.status == ConnectorCredentialPairStatus.INVALID:
existing_pair.status = ConnectorCredentialPairStatus.ACTIVE
# Commit the changes
db_session.commit()

View File

@@ -73,6 +73,7 @@ class ConnectorCredentialPairStatus(str, PyEnum):
ACTIVE = "ACTIVE"
PAUSED = "PAUSED"
DELETING = "DELETING"
INVALID = "INVALID"
def is_active(self) -> bool:
return self == ConnectorCredentialPairStatus.ACTIVE

View File

@@ -25,6 +25,9 @@ from onyx.background.celery.versioned_apps.primary import app as primary_app
from onyx.background.indexing.models import IndexAttemptErrorPydantic
from onyx.configs.constants import OnyxCeleryPriority
from onyx.configs.constants import OnyxCeleryTask
from onyx.connectors.factory import validate_ccpair_for_user
from onyx.connectors.interfaces import ConnectorValidationError
from onyx.db.connector import delete_connector
from onyx.db.connector_credential_pair import add_credential_to_connector
from onyx.db.connector_credential_pair import (
get_connector_credential_pair_from_id_for_user,
@@ -616,6 +619,10 @@ def associate_credential_to_connector(
)
try:
validate_ccpair_for_user(
connector_id, credential_id, db_session, user, tenant_id
)
response = add_credential_to_connector(
db_session=db_session,
user=user,
@@ -640,10 +647,30 @@ def associate_credential_to_connector(
)
return response
except ConnectorValidationError as e:
print("EXCEPTINO 1")
print(type(e))
# If validation fails, delete the connector and commit the changes
# Ensures we don't leave invalid connectors in the database
# NOTE: consensus is that it makes sense to unify connector and ccpair creation flows
# which would rid us of needing to handle cases like these
delete_connector(db_session, connector_id)
db_session.commit()
raise HTTPException(
status_code=400, detail="Connector validation error: " + str(e)
)
except IntegrityError as e:
logger.error(f"IntegrityError: {e}")
raise HTTPException(status_code=400, detail="Name must be unique")
except Exception as e:
logger.exception(f"Unexpected error: {e}")
raise HTTPException(status_code=500, detail="Unexpected error")
@router.delete("/connector/{connector_id}/credential/{credential_id}")
def dissociate_credential_from_connector(

View File

@@ -28,6 +28,7 @@ from onyx.configs.constants import FileOrigin
from onyx.configs.constants import MilestoneRecordType
from onyx.configs.constants import OnyxCeleryPriority
from onyx.configs.constants import OnyxCeleryTask
from onyx.connectors.factory import validate_ccpair_for_user
from onyx.connectors.google_utils.google_auth import (
get_google_oauth_creds,
)
@@ -61,6 +62,7 @@ from onyx.connectors.google_utils.shared_constants import DB_CREDENTIALS_DICT_TO
from onyx.connectors.google_utils.shared_constants import (
GoogleOAuthAuthenticationMethod,
)
from onyx.connectors.interfaces import ConnectorValidationError
from onyx.db.connector import create_connector
from onyx.db.connector import delete_connector
from onyx.db.connector import fetch_connector_by_id
@@ -850,11 +852,22 @@ def create_connector_with_mock_credential(
db_session=db_session,
)
# Store the created connector and credential IDs
connector_id = cast(int, connector_response.id)
credential_id = credential.id
validate_ccpair_for_user(
connector_id=connector_id,
credential_id=credential_id,
db_session=db_session,
user=user,
tenant_id=tenant_id,
)
response = add_credential_to_connector(
db_session=db_session,
user=user,
connector_id=cast(int, connector_response.id), # will aways be an int
credential_id=credential.id,
connector_id=connector_id,
credential_id=credential_id,
access_type=connector_data.access_type,
cc_pair_name=connector_data.name,
groups=connector_data.groups,
@@ -879,9 +892,12 @@ def create_connector_with_mock_credential(
properties=None,
db_session=db_session,
)
return response
except ConnectorValidationError as e:
raise HTTPException(
status_code=400, detail="Connector validation error: " + str(e)
)
except ValueError as e:
raise HTTPException(status_code=400, detail=str(e))

View File

@@ -7,6 +7,7 @@ from sqlalchemy.orm import Session
from onyx.auth.users import current_admin_user
from onyx.auth.users import current_curator_or_admin_user
from onyx.auth.users import current_user
from onyx.connectors.factory import validate_ccpair_for_user
from onyx.db.credentials import alter_credential
from onyx.db.credentials import cleanup_gmail_credentials
from onyx.db.credentials import create_credential
@@ -17,6 +18,7 @@ from onyx.db.credentials import fetch_credentials_by_source_for_user
from onyx.db.credentials import fetch_credentials_for_user
from onyx.db.credentials import swap_credentials_connector
from onyx.db.credentials import update_credential
from onyx.db.engine import get_current_tenant_id
from onyx.db.engine import get_session
from onyx.db.models import DocumentSource
from onyx.db.models import User
@@ -98,7 +100,16 @@ def swap_credentials_for_connector(
credential_swap_req: CredentialSwapRequest,
user: User | None = Depends(current_user),
db_session: Session = Depends(get_session),
tenant_id: str | None = Depends(get_current_tenant_id),
) -> StatusResponse:
validate_ccpair_for_user(
credential_swap_req.connector_id,
credential_swap_req.new_credential_id,
db_session,
user,
tenant_id,
)
connector_credential_pair = swap_credentials_connector(
new_credential_id=credential_swap_req.new_credential_id,
connector_id=credential_swap_req.connector_id,

40
check_bookstack_api.py Normal file
View File

@@ -0,0 +1,40 @@
import requests
def check_bookstack_api(base_url, token_id, token_secret):
# Construct the API endpoint URL
api_url = f"{base_url.rstrip('/')}/api/books"
# Set up the headers with the API credentials
headers = {
"Authorization": f"Token {token_id}:{token_secret}",
"Accept": "application/json",
}
try:
# Make a GET request to the API
response = requests.get(api_url, headers=headers)
# Check the response status code
if response.status_code == 200:
print("API key is valid and not expired.")
return True
elif response.status_code == 401:
print("API key is invalid or has expired.")
return False
else:
print(f"Unexpected response. Status code: {response.status_code}")
print(f"Response content: {response.text}")
return False
except requests.RequestException as e:
print(f"An error occurred while making the request: {e}")
return False
if __name__ == "__main__":
base_url = "https://kb.yuma1.com"
token_id = "MK4OgjOLEEhvHQf0cEwjYr1kVSYZoGa7"
token_secret = "kV6a8dnHOwAzAjL22jTMJHDhxWhqkeSw"
check_bookstack_api(base_url, token_id, token_secret)

View File

@@ -67,12 +67,13 @@ export default function UpgradingPage({
};
const statusOrder: Record<ValidStatuses, number> = useMemo(
() => ({
failed: 0,
canceled: 1,
completed_with_errors: 2,
not_started: 3,
in_progress: 4,
success: 5,
invalid: 0,
failed: 1,
canceled: 2,
completed_with_errors: 3,
not_started: 4,
in_progress: 5,
success: 6,
}),
[]
);

View File

@@ -4,9 +4,14 @@ import { PopupSpec, usePopup } from "@/components/admin/connectors/Popup";
import { Button } from "@/components/ui/button";
import Text from "@/components/ui/text";
import { triggerIndexing } from "./lib";
import { mutate } from "swr";
import { buildCCPairInfoUrl, getTooltipMessage } from "./lib";
import { useState } from "react";
import { Modal } from "@/components/Modal";
import { Separator } from "@/components/ui/separator";
import { ConnectorCredentialPairStatus } from "./types";
import { CCPairStatus } from "@/components/Status";
import { getCCPairStatusMessage } from "@/lib/ccPair";
function ReIndexPopup({
connectorId,
@@ -83,16 +88,16 @@ export function ReIndexButton({
ccPairId,
connectorId,
credentialId,
isDisabled,
isIndexing,
isDeleting,
isDisabled,
ccPairStatus,
}: {
ccPairId: number;
connectorId: number;
credentialId: number;
isDisabled: boolean;
isIndexing: boolean;
isDeleting: boolean;
isDisabled: boolean;
ccPairStatus: ConnectorCredentialPairStatus;
}) {
const { popup, setPopup } = usePopup();
const [reIndexPopupVisible, setReIndexPopupVisible] = useState(false);
@@ -115,18 +120,14 @@ export function ReIndexButton({
onClick={() => {
setReIndexPopupVisible(true);
}}
disabled={isDisabled || isDeleting}
tooltip={
isDeleting
? "Cannot index while connector is deleting"
: isIndexing
? "Indexing is already in progress"
: isDisabled
? "Connector must be re-enabled before indexing"
: undefined
disabled={
isDisabled ||
ccPairStatus == ConnectorCredentialPairStatus.DELETING ||
ccPairStatus == ConnectorCredentialPairStatus.PAUSED
}
tooltip={getCCPairStatusMessage(isDisabled, isIndexing, ccPairStatus)}
>
Index
Re-Index
</Button>
</>
);

View File

@@ -40,3 +40,24 @@ export async function triggerIndexing(
}
mutate(buildCCPairInfoUrl(ccPairId));
}
export function getTooltipMessage(
isInvalid: boolean,
isDeleting: boolean,
isIndexing: boolean,
isDisabled: boolean
): string | undefined {
if (isInvalid) {
return "Connector is in an invalid state. Please update the credentials or configuration before re-indexing.";
}
if (isDeleting) {
return "Cannot index while connector is deleting";
}
if (isIndexing) {
return "Indexing is already in progress";
}
if (isDisabled) {
return "Connector must be re-enabled before indexing";
}
return undefined;
}

View File

@@ -43,6 +43,7 @@ import IndexAttemptErrorsModal from "./IndexAttemptErrorsModal";
import usePaginatedFetch from "@/hooks/usePaginatedFetch";
import { IndexAttemptSnapshot } from "@/lib/types";
import { Spinner } from "@/components/Spinner";
import { Callout } from "@/components/ui/callout";
// synchronize these validations with the SQLAlchemy connector class until we have a
// centralized schema for both frontend and backend
@@ -363,6 +364,7 @@ function Main({ ccPairId }: { ccPairId: number }) {
<div className="ml-auto flex gap-x-2">
<ReIndexButton
ccPairId={ccPair.id}
ccPairStatus={ccPair.status}
connectorId={ccPair.connector.id}
credentialId={ccPair.credential.id}
isDisabled={
@@ -370,7 +372,6 @@ function Main({ ccPairId }: { ccPairId: number }) {
ccPair.status === ConnectorCredentialPairStatus.PAUSED
}
isIndexing={ccPair.indexing}
isDeleting={isDeleting}
/>
{!isDeleting && <ModifyStatusButtonCluster ccPair={ccPair} />}
@@ -379,8 +380,7 @@ function Main({ ccPairId }: { ccPairId: number }) {
</div>
<CCPairStatus
status={ccPair.last_index_attempt_status || "not_started"}
disabled={ccPair.status === ConnectorCredentialPairStatus.PAUSED}
isDeleting={isDeleting}
ccPairStatus={ccPair.status}
/>
<div className="text-sm mt-1">
Creator:{" "}
@@ -424,6 +424,16 @@ function Main({ ccPairId }: { ccPairId: number }) {
/>
</>
)}
{ccPair.status === ConnectorCredentialPairStatus.INVALID && (
<div className="mt-2">
<Callout type="warning" title="Invalid Connector State">
This connector is in an invalid state. Please update your
credentials or create a new connector before re-indexing.
</Callout>
</div>
)}
<Separator />
<ConfigDisplay
connectorSpecificConfig={ccPair.connector.connector_specific_config}

View File

@@ -12,6 +12,7 @@ export enum ConnectorCredentialPairStatus {
ACTIVE = "ACTIVE",
PAUSED = "PAUSED",
DELETING = "DELETING",
INVALID = "INVALID",
}
export interface CCPairFullInfo {

View File

@@ -418,7 +418,7 @@ export default function AddConnector({
} else {
const errorData = await linkCredentialResponse.json();
setPopup({
message: errorData.message,
message: errorData.message || errorData.detail,
type: "error",
});
}

View File

@@ -159,6 +159,19 @@ function ConnectorRow({
Paused
</Badge>
);
} else if (
ccPairsIndexingStatus.cc_pair_status ===
ConnectorCredentialPairStatus.INVALID
) {
return (
<Badge
tooltip="Connector is in an invalid state. Please update the credentials or create a new connector."
circle
variant="invalid"
>
Invalid
</Badge>
);
}
// ACTIVE case

View File

@@ -8,58 +8,32 @@ interface HoverPopupProps {
style?: "basic" | "dark";
}
import {
Tooltip,
TooltipContent,
TooltipProvider,
TooltipTrigger,
} from "@/components/ui/tooltip";
export const HoverPopup = ({
mainContent,
popupContent,
classNameModifications,
direction = "bottom",
style = "basic",
}: HoverPopupProps) => {
const [hovered, setHovered] = useState(false);
let popupDirectionClass;
let popupStyle = {};
switch (direction) {
case "left":
popupDirectionClass = "top-0 left-0 transform";
popupStyle = { transform: "translateX(calc(-100% - 5px))" };
break;
case "left-top":
popupDirectionClass = "bottom-0 left-0";
popupStyle = { transform: "translate(calc(-100% - 5px), 0)" };
break;
case "bottom":
popupDirectionClass = "top-0 left-0 mt-6 pt-2";
break;
case "top":
popupDirectionClass = "top-0 left-0 translate-y-[-100%] pb-2";
break;
}
return (
<div
className="relative flex"
onMouseEnter={() => {
setHovered(true);
}}
onMouseLeave={() => setHovered(false)}
>
{hovered && (
<div
className={`absolute ${popupDirectionClass} z-30`}
style={popupStyle}
<TooltipProvider>
<Tooltip>
<TooltipTrigger asChild>
<div>{mainContent}</div>
</TooltipTrigger>
<TooltipContent
side={direction === "left-top" ? "left" : direction}
className={classNameModifications}
>
<div
className={
`px-3 py-2 rounded bg-background border border-border` +
(classNameModifications || "")
}
>
{popupContent}
</div>
</div>
)}
<div>{mainContent}</div>
</div>
{popupContent}
</TooltipContent>
</Tooltip>
</TooltipProvider>
);
};

View File

@@ -10,6 +10,7 @@ import {
FiPauseCircle,
} from "react-icons/fi";
import { HoverPopup } from "./HoverPopup";
import { ConnectorCredentialPairStatus } from "@/app/admin/connector/[ccPairId]/types";
export function IndexAttemptStatus({
status,
@@ -70,6 +71,12 @@ export function IndexAttemptStatus({
Canceled
</Badge>
);
} else if (status === "invalid") {
badge = (
<Badge variant="invalid" icon={FiAlertTriangle}>
Invalid
</Badge>
);
} else {
badge = (
<Badge variant="outline" icon={FiMinus}>
@@ -83,29 +90,33 @@ export function IndexAttemptStatus({
export function CCPairStatus({
status,
disabled,
isDeleting,
ccPairStatus,
size = "md",
}: {
status: ValidStatuses;
disabled: boolean;
isDeleting: boolean;
ccPairStatus: ConnectorCredentialPairStatus;
size?: "xs" | "sm" | "md" | "lg";
}) {
let badge;
if (isDeleting) {
if (ccPairStatus == ConnectorCredentialPairStatus.DELETING) {
badge = (
<Badge variant="destructive" icon={FiAlertTriangle}>
Deleting
</Badge>
);
} else if (disabled) {
} else if (ccPairStatus == ConnectorCredentialPairStatus.PAUSED) {
badge = (
<Badge variant="paused" icon={FiPauseCircle}>
Paused
</Badge>
);
} else if (ccPairStatus == ConnectorCredentialPairStatus.INVALID) {
badge = (
<Badge variant="invalid" icon={FiAlertTriangle}>
Invalid
</Badge>
);
} else if (status === "failed") {
badge = (
<Badge variant="destructive" icon={FiAlertTriangle}>

View File

@@ -79,14 +79,24 @@ export default function CredentialSection({
selectedCredential: Credential<any>,
connectorId: number
) => {
await swapCredential(selectedCredential.id, connectorId);
mutate(buildSimilarCredentialInfoURL(sourceType));
refresh();
const response = await swapCredential(selectedCredential.id, connectorId);
if (response.ok) {
mutate(buildSimilarCredentialInfoURL(sourceType));
refresh();
setPopup({
message: "Swapped credential succesfully!",
type: "success",
});
setPopup({
message: "Swapped credential successfully!",
type: "success",
});
} else {
const errorData = await response.json();
setPopup({
message: `Issue swapping credential: ${
errorData.detail || errorData.message || "Unknown error"
}`,
type: "error",
});
}
};
const onUpdateCredential = async (

View File

@@ -1,6 +1,11 @@
import * as React from "react";
import { cva, type VariantProps } from "class-variance-authority";
import {
Tooltip,
TooltipContent,
TooltipProvider,
TooltipTrigger,
} from "@/components/ui/tooltip";
import { cn } from "@/lib/utils";
const badgeVariants = cva(
@@ -8,6 +13,8 @@ const badgeVariants = cva(
{
variants: {
variant: {
invalid:
"border-orange-200 bg-orange-50 text-orange-600 dark:border-orange-700 dark:bg-orange-900 dark:text-orange-50",
outline:
"border-neutral-200 bg-neutral-50 text-neutral-600 dark:border-neutral-700 dark:bg-neutral-900 dark:text-neutral-50",
purple:
@@ -57,11 +64,13 @@ function Badge({
icon: Icon,
size = "sm",
circle,
tooltip,
...props
}: BadgeProps & {
icon?: React.ElementType;
size?: "sm" | "md" | "xs";
circle?: boolean;
tooltip?: string;
}) {
const sizeClasses = {
sm: "px-2.5 py-0.5 text-xs",
@@ -69,7 +78,7 @@ function Badge({
xs: "px-1.5 py-0.25 text-[.5rem]",
};
return (
const BadgeContent = (
<div
className={cn(
"flex-none inline-flex items-center whitespace-nowrap overflow-hidden",
@@ -98,6 +107,21 @@ function Badge({
<span className="truncate">{props.children}</span>
</div>
);
if (tooltip) {
return (
<TooltipProvider>
<Tooltip>
<TooltipTrigger asChild>{BadgeContent}</TooltipTrigger>
<TooltipContent>
<p>{tooltip}</p>
</TooltipContent>
</Tooltip>
</TooltipProvider>
);
}
return BadgeContent;
}
export { Badge, badgeVariants };

View File

@@ -88,7 +88,6 @@ export interface ButtonProps
tooltip?: string;
reverse?: boolean;
}
const Button = React.forwardRef<HTMLButtonElement, ButtonProps>(
(
{
@@ -124,7 +123,9 @@ const Button = React.forwardRef<HTMLButtonElement, ButtonProps>(
return (
<TooltipProvider>
<Tooltip>
<TooltipTrigger asChild>{button}</TooltipTrigger>
<TooltipTrigger>
<div>{button}</div>
</TooltipTrigger>
<TooltipContent showTick={true}>
<p>{tooltip}</p>
</TooltipContent>

View File

@@ -46,3 +46,23 @@ export async function setCCPairStatus(
});
}
}
export const getCCPairStatusMessage = (
isDisabled: boolean,
isIndexing: boolean,
ccPairStatus: ConnectorCredentialPairStatus
) => {
if (ccPairStatus === ConnectorCredentialPairStatus.INVALID) {
return "Connector is in an invalid state. Please update the credentials or configuration before re-indexing.";
}
if (ccPairStatus === ConnectorCredentialPairStatus.DELETING) {
return "Cannot index while connector is deleting";
}
if (isIndexing) {
return "Indexing is already in progress";
}
if (isDisabled) {
return "Connector must be re-enabled before indexing";
}
return undefined;
};

View File

@@ -92,6 +92,7 @@ export type ValidInputTypes =
| "event"
| "slim_retrieval";
export type ValidStatuses =
| "invalid"
| "success"
| "completed_with_errors"
| "canceled"