Compare commits

..

1 Commits

Author SHA1 Message Date
pablonyx
8ea54e1f52 add connector validation 2025-02-19 10:44:08 -08:00
40 changed files with 157 additions and 547 deletions

View File

@@ -74,9 +74,7 @@ jobs:
python -m pip install --upgrade pip
pip install --retries 5 --timeout 30 -r backend/requirements/default.txt
pip install --retries 5 --timeout 30 -r backend/requirements/dev.txt
playwright install chromium
playwright install-deps chromium
- name: Run Tests
shell: script -q -e -c "bash --noprofile --norc -eo pipefail {0}"
run: py.test -o junit_family=xunit2 -xv --ff backend/tests/daily/connectors

View File

@@ -1,7 +1,7 @@
"""remove inactive ccpair status on downgrade
Revision ID: acaab4ef4507
Revises: b388730a2899
Revises: b7a7eee5aa15
Create Date: 2025-02-16 18:21:41.330212
"""
@@ -12,7 +12,7 @@ from sqlalchemy import update
# revision identifiers, used by Alembic.
revision = "acaab4ef4507"
down_revision = "b388730a2899"
down_revision = "b7a7eee5aa15"
branch_labels = None
depends_on = None

View File

@@ -1,27 +0,0 @@
"""Add composite index for last_modified and last_synced to document
Revision ID: f13db29f3101
Revises: b388730a2899
Create Date: 2025-02-18 22:48:11.511389
"""
from alembic import op
# revision identifiers, used by Alembic.
revision = "f13db29f3101"
down_revision = "acaab4ef4507"
branch_labels: str | None = None
depends_on: str | None = None
def upgrade() -> None:
op.create_index(
"ix_document_sync_status",
"document",
["last_modified", "last_synced"],
unique=False,
)
def downgrade() -> None:
op.drop_index("ix_document_sync_status", table_name="document")

View File

@@ -10,7 +10,6 @@ from onyx.configs.app_configs import SMTP_PORT
from onyx.configs.app_configs import SMTP_SERVER
from onyx.configs.app_configs import SMTP_USER
from onyx.configs.app_configs import WEB_DOMAIN
from onyx.configs.constants import AuthType
from onyx.configs.constants import TENANT_ID_COOKIE_NAME
from onyx.db.models import User
@@ -188,51 +187,23 @@ def send_subscription_cancellation_email(user_email: str) -> None:
send_email(user_email, subject, html_content, text_content)
def send_user_email_invite(
user_email: str, current_user: User, auth_type: AuthType
) -> None:
def send_user_email_invite(user_email: str, current_user: User) -> None:
subject = "Invitation to Join Onyx Organization"
heading = "You've Been Invited!"
# the exact action taken by the user, and thus the message, depends on the auth type
message = f"<p>You have been invited by {current_user.email} to join an organization on Onyx.</p>"
if auth_type == AuthType.CLOUD:
message += (
"<p>To join the organization, please click the button below to set a password "
"or login with Google and complete your registration.</p>"
)
elif auth_type == AuthType.BASIC:
message += (
"<p>To join the organization, please click the button below to set a password "
"and complete your registration.</p>"
)
elif auth_type == AuthType.GOOGLE_OAUTH:
message += (
"<p>To join the organization, please click the button below to login with Google "
"and complete your registration.</p>"
)
elif auth_type == AuthType.OIDC or auth_type == AuthType.SAML:
message += (
"<p>To join the organization, please click the button below to"
" complete your registration.</p>"
)
else:
raise ValueError(f"Invalid auth type: {auth_type}")
message = (
f"<p>You have been invited by {current_user.email} to join an organization on Onyx.</p>"
"<p>To join the organization, please click the button below to set a password "
"or login with Google and complete your registration.</p>"
)
cta_text = "Join Organization"
cta_link = f"{WEB_DOMAIN}/auth/signup?email={user_email}"
html_content = build_html_email(heading, message, cta_text, cta_link)
# text content is the fallback for clients that don't support HTML
# not as critical, so not having special cases for each auth type
text_content = (
f"You have been invited by {current_user.email} to join an organization on Onyx.\n"
"To join the organization, please visit the following link:\n"
f"{WEB_DOMAIN}/auth/signup?email={user_email}\n"
"You'll be asked to set a password or login with Google to complete your registration."
)
if auth_type == AuthType.CLOUD:
text_content += "You'll be asked to set a password or login with Google to complete your registration."
send_email(user_email, subject, html_content, text_content)

View File

@@ -95,7 +95,6 @@ from onyx.db.models import User
from onyx.db.users import get_user_by_email
from onyx.redis.redis_pool import get_async_redis_connection
from onyx.redis.redis_pool import get_redis_client
from onyx.server.utils import BasicAuthenticationError
from onyx.utils.logger import setup_logger
from onyx.utils.telemetry import create_milestone_and_report
from onyx.utils.telemetry import optional_telemetry
@@ -110,6 +109,11 @@ from shared_configs.contextvars import get_current_tenant_id
logger = setup_logger()
class BasicAuthenticationError(HTTPException):
def __init__(self, detail: str):
super().__init__(status_code=status.HTTP_403_FORBIDDEN, detail=detail)
def is_user_admin(user: User | None) -> bool:
if AUTH_TYPE == AuthType.DISABLED:
return True

View File

@@ -140,7 +140,7 @@ def on_task_postrun(
f"{f'for tenant_id={tenant_id}' if tenant_id else ''}"
)
r = get_redis_client(tenant_id=tenant_id)
r = get_redis_client()
if task_id.startswith(RedisConnectorCredentialPair.PREFIX):
r.srem(RedisConnectorCredentialPair.get_taskset_key(), task_id)

View File

@@ -190,8 +190,7 @@ def create_chat_chain(
and previous_message.message_type == MessageType.ASSISTANT
and mainline_messages
):
if current_message.refined_answer_improvement:
mainline_messages[-1] = current_message
mainline_messages[-1] = current_message
else:
mainline_messages.append(current_message)

View File

@@ -142,15 +142,6 @@ class MessageResponseIDInfo(BaseModel):
reserved_assistant_message_id: int
class AgentMessageIDInfo(BaseModel):
level: int
message_id: int
class AgenticMessageResponseIDInfo(BaseModel):
agentic_message_ids: list[AgentMessageIDInfo]
class StreamingError(BaseModel):
error: str
stack_trace: str | None = None

View File

@@ -11,8 +11,6 @@ from onyx.agents.agent_search.orchestration.nodes.call_tool import ToolCallExcep
from onyx.chat.answer import Answer
from onyx.chat.chat_utils import create_chat_chain
from onyx.chat.chat_utils import create_temporary_persona
from onyx.chat.models import AgenticMessageResponseIDInfo
from onyx.chat.models import AgentMessageIDInfo
from onyx.chat.models import AgentSearchPacket
from onyx.chat.models import AllCitations
from onyx.chat.models import AnswerPostInfo
@@ -310,7 +308,6 @@ ChatPacket = (
| CustomToolResponse
| MessageSpecificCitations
| MessageResponseIDInfo
| AgenticMessageResponseIDInfo
| StreamStopInfo
| AgentSearchPacket
)
@@ -1038,7 +1035,6 @@ def stream_chat_message_objects(
next_level = 1
prev_message = gen_ai_response_message
agent_answers = answer.llm_answer_by_level()
agentic_message_ids = []
while next_level in agent_answers:
next_answer = agent_answers[next_level]
info = info_by_subq[
@@ -1063,18 +1059,17 @@ def stream_chat_message_objects(
refined_answer_improvement=refined_answer_improvement,
is_agentic=True,
)
agentic_message_ids.append(
AgentMessageIDInfo(level=next_level, message_id=next_answer_message.id)
)
next_level += 1
prev_message = next_answer_message
logger.debug("Committing messages")
db_session.commit() # actually save user / assistant message
yield AgenticMessageResponseIDInfo(agentic_message_ids=agentic_message_ids)
msg_detail_response = translate_db_message_to_chat_message_detail(
gen_ai_response_message
)
yield translate_db_message_to_chat_message_detail(gen_ai_response_message)
yield msg_detail_response
except Exception as e:
error_msg = str(e)
logger.exception(error_msg)

View File

@@ -5,8 +5,6 @@ import requests
class BookStackClientRequestFailedError(ConnectionError):
def __init__(self, status: int, error: str) -> None:
self.status_code = status
self.error = error
super().__init__(
"BookStack Client request failed with status {status}: {error}".format(
status=status, error=error

View File

@@ -7,12 +7,8 @@ from typing import Any
from onyx.configs.app_configs import INDEX_BATCH_SIZE
from onyx.configs.constants import DocumentSource
from onyx.connectors.bookstack.client import BookStackApiClient
from onyx.connectors.bookstack.client import BookStackClientRequestFailedError
from onyx.connectors.cross_connector_utils.miscellaneous_utils import time_str_to_utc
from onyx.connectors.interfaces import ConnectorValidationError
from onyx.connectors.interfaces import CredentialExpiredError
from onyx.connectors.interfaces import GenerateDocumentsOutput
from onyx.connectors.interfaces import InsufficientPermissionsError
from onyx.connectors.interfaces import LoadConnector
from onyx.connectors.interfaces import PollConnector
from onyx.connectors.interfaces import SecondsSinceUnixEpoch
@@ -218,39 +214,3 @@ class BookstackConnector(LoadConnector, PollConnector):
break
else:
time.sleep(0.2)
def validate_connector_settings(self) -> None:
"""
Validate that the BookStack credentials and connector settings are correct.
Specifically checks that we can make an authenticated request to BookStack.
"""
if not self.bookstack_client:
raise ConnectorMissingCredentialError(
"BookStack credentials have not been loaded."
)
try:
# Attempt to fetch a small batch of books (arbitrary endpoint) to verify credentials
_ = self.bookstack_client.get(
"/books", params={"count": "1", "offset": "0"}
)
except BookStackClientRequestFailedError as e:
# Check for HTTP status codes
if e.status_code == 401:
raise CredentialExpiredError(
"Your BookStack credentials appear to be invalid or expired (HTTP 401)."
) from e
elif e.status_code == 403:
raise InsufficientPermissionsError(
"The configured BookStack token does not have sufficient permissions (HTTP 403)."
) from e
else:
raise ConnectorValidationError(
f"Unexpected BookStack error (status={e.status_code}): {e}"
) from e
except Exception as exc:
raise ConnectorValidationError(
f"Unexpected error while validating BookStack connector settings: {exc}"
) from exc

View File

@@ -1,4 +1,3 @@
import re
from collections.abc import Callable
from collections.abc import Iterator
from datetime import datetime
@@ -25,22 +24,16 @@ def datetime_to_utc(dt: datetime) -> datetime:
def time_str_to_utc(datetime_str: str) -> datetime:
# Remove all timezone abbreviations in parentheses
datetime_str = re.sub(r"\([A-Z]+\)", "", datetime_str).strip()
# Remove any remaining parentheses and their contents
datetime_str = re.sub(r"\(.*?\)", "", datetime_str).strip()
try:
dt = parse(datetime_str)
except ValueError:
# Fix common format issues (e.g. "0000" => "+0000")
# Handle malformed timezone by attempting to fix common format issues
if "0000" in datetime_str:
datetime_str = datetime_str.replace(" 0000", " +0000")
dt = parse(datetime_str)
# Convert "0000" to "+0000" for proper timezone parsing
fixed_dt_str = datetime_str.replace(" 0000", " +0000")
dt = parse(fixed_dt_str)
else:
raise
return datetime_to_utc(dt)

View File

@@ -195,15 +195,10 @@ def validate_ccpair_for_user(
db_session,
get_editable=False,
)
if not connector:
raise ValueError("Connector not found")
if connector.source == DocumentSource.INGESTION_API:
return
if not credential:
raise ValueError("Credential not found")
if not connector:
raise ValueError("Connector not found")
try:
runnable_connector = instantiate_connector(
@@ -215,6 +210,7 @@ def validate_ccpair_for_user(
tenant_id=tenant_id,
)
except Exception as e:
raise ConnectorValidationError(str(e))
error_msg = f"Unexpected error creating connector: {e}"
raise ConnectorValidationError(error_msg)
runnable_connector.validate_connector_settings()

View File

@@ -187,12 +187,12 @@ class FirefliesConnector(PollConnector, LoadConnector):
return self._process_transcripts()
def poll_source(
self, start: SecondsSinceUnixEpoch, end: SecondsSinceUnixEpoch
self, start_unixtime: SecondsSinceUnixEpoch, end_unixtime: SecondsSinceUnixEpoch
) -> GenerateDocumentsOutput:
start_datetime = datetime.fromtimestamp(start, tz=timezone.utc).strftime(
"%Y-%m-%dT%H:%M:%S.000Z"
)
end_datetime = datetime.fromtimestamp(end, tz=timezone.utc).strftime(
start_datetime = datetime.fromtimestamp(
start_unixtime, tz=timezone.utc
).strftime("%Y-%m-%dT%H:%M:%S.000Z")
end_datetime = datetime.fromtimestamp(end_unixtime, tz=timezone.utc).strftime(
"%Y-%m-%dT%H:%M:%S.000Z"
)

View File

@@ -229,20 +229,16 @@ class GitbookConnector(LoadConnector, PollConnector):
try:
content = self.client.get(f"/spaces/{self.space_id}/content")
pages: list[dict[str, Any]] = content.get("pages", [])
pages = content.get("pages", [])
current_batch: list[Document] = []
for page in pages:
updated_at = datetime.fromisoformat(page["updatedAt"])
while pages:
page = pages.pop(0)
updated_at_raw = page.get("updatedAt")
if updated_at_raw is None:
# if updatedAt is not present, that means the page has never been edited
continue
updated_at = datetime.fromisoformat(updated_at_raw)
if start and updated_at < start:
continue
if current_batch:
yield current_batch
return
if end and updated_at > end:
continue
@@ -254,8 +250,6 @@ class GitbookConnector(LoadConnector, PollConnector):
yield current_batch
current_batch = []
pages.extend(page.get("pages", []))
if current_batch:
yield current_batch

View File

@@ -24,7 +24,6 @@ from onyx.connectors.interfaces import InsufficientPermissionsError
from onyx.connectors.interfaces import LoadConnector
from onyx.connectors.interfaces import PollConnector
from onyx.connectors.interfaces import SecondsSinceUnixEpoch
from onyx.connectors.interfaces import UnexpectedError
from onyx.connectors.models import ConnectorMissingCredentialError
from onyx.connectors.models import Document
from onyx.connectors.models import Section
@@ -246,7 +245,7 @@ class GithubConnector(LoadConnector, PollConnector):
test_repo.get_contents("")
except RateLimitExceededException:
raise UnexpectedError(
raise ConnectorValidationError(
"Validation failed due to GitHub rate-limits being exceeded. Please try again later."
)

View File

@@ -297,7 +297,6 @@ class GmailConnector(LoadConnector, PollConnector, SlimConnector):
userId=user_email,
fields=THREAD_LIST_FIELDS,
q=query,
continue_on_404_or_403=True,
):
full_threads = execute_paginated_retrieval(
retrieval_function=gmail_service.users().threads().get,

View File

@@ -25,12 +25,8 @@ from onyx.configs.app_configs import WEB_CONNECTOR_OAUTH_CLIENT_SECRET
from onyx.configs.app_configs import WEB_CONNECTOR_OAUTH_TOKEN_URL
from onyx.configs.app_configs import WEB_CONNECTOR_VALIDATE_URLS
from onyx.configs.constants import DocumentSource
from onyx.connectors.interfaces import ConnectorValidationError
from onyx.connectors.interfaces import CredentialExpiredError
from onyx.connectors.interfaces import GenerateDocumentsOutput
from onyx.connectors.interfaces import InsufficientPermissionsError
from onyx.connectors.interfaces import LoadConnector
from onyx.connectors.interfaces import UnexpectedError
from onyx.connectors.models import Document
from onyx.connectors.models import Section
from onyx.file_processing.extract_file_text import read_pdf_file
@@ -41,8 +37,6 @@ from shared_configs.configs import MULTI_TENANT
logger = setup_logger()
WEB_CONNECTOR_MAX_SCROLL_ATTEMPTS = 20
class WEB_CONNECTOR_VALID_SETTINGS(str, Enum):
# Given a base site, index everything under that path
@@ -176,35 +170,26 @@ def start_playwright() -> Tuple[Playwright, BrowserContext]:
def extract_urls_from_sitemap(sitemap_url: str) -> list[str]:
try:
response = requests.get(sitemap_url)
response.raise_for_status()
response = requests.get(sitemap_url)
response.raise_for_status()
soup = BeautifulSoup(response.content, "html.parser")
urls = [
_ensure_absolute_url(sitemap_url, loc_tag.text)
for loc_tag in soup.find_all("loc")
]
soup = BeautifulSoup(response.content, "html.parser")
urls = [
_ensure_absolute_url(sitemap_url, loc_tag.text)
for loc_tag in soup.find_all("loc")
]
if len(urls) == 0 and len(soup.find_all("urlset")) == 0:
# the given url doesn't look like a sitemap, let's try to find one
urls = list_pages_for_site(sitemap_url)
if len(urls) == 0 and len(soup.find_all("urlset")) == 0:
# the given url doesn't look like a sitemap, let's try to find one
urls = list_pages_for_site(sitemap_url)
if len(urls) == 0:
raise ValueError(
f"No URLs found in sitemap {sitemap_url}. Try using the 'single' or 'recursive' scraping options instead."
)
return urls
except requests.RequestException as e:
raise RuntimeError(f"Failed to fetch sitemap from {sitemap_url}: {e}")
except ValueError as e:
raise RuntimeError(f"Error processing sitemap {sitemap_url}: {e}")
except Exception as e:
raise RuntimeError(
f"Unexpected error while processing sitemap {sitemap_url}: {e}"
if len(urls) == 0:
raise ValueError(
f"No URLs found in sitemap {sitemap_url}. Try using the 'single' or 'recursive' scraping options instead."
)
return urls
def _ensure_absolute_url(source_url: str, maybe_relative_url: str) -> str:
if not urlparse(maybe_relative_url).netloc:
@@ -240,14 +225,10 @@ class WebConnector(LoadConnector):
web_connector_type: str = WEB_CONNECTOR_VALID_SETTINGS.RECURSIVE.value,
mintlify_cleanup: bool = True, # Mostly ok to apply to other websites as well
batch_size: int = INDEX_BATCH_SIZE,
scroll_before_scraping: bool = False,
**kwargs: Any,
) -> None:
self.mintlify_cleanup = mintlify_cleanup
self.batch_size = batch_size
self.recursive = False
self.scroll_before_scraping = scroll_before_scraping
self.web_connector_type = web_connector_type
if web_connector_type == WEB_CONNECTOR_VALID_SETTINGS.RECURSIVE.value:
self.recursive = True
@@ -363,18 +344,6 @@ class WebConnector(LoadConnector):
continue
visited_links.add(current_url)
if self.scroll_before_scraping:
scroll_attempts = 0
previous_height = page.evaluate("document.body.scrollHeight")
while scroll_attempts < WEB_CONNECTOR_MAX_SCROLL_ATTEMPTS:
page.evaluate("window.scrollTo(0, document.body.scrollHeight)")
page.wait_for_load_state("networkidle", timeout=30000)
new_height = page.evaluate("document.body.scrollHeight")
if new_height == previous_height:
break # Stop scrolling when no more content is loaded
previous_height = new_height
scroll_attempts += 1
content = page.content()
soup = BeautifulSoup(content, "html.parser")
@@ -433,53 +402,6 @@ class WebConnector(LoadConnector):
raise RuntimeError(last_error)
raise RuntimeError("No valid pages found.")
def validate_connector_settings(self) -> None:
# Make sure we have at least one valid URL to check
if not self.to_visit_list:
raise ConnectorValidationError(
"No URL configured. Please provide at least one valid URL."
)
if self.web_connector_type == WEB_CONNECTOR_VALID_SETTINGS.SITEMAP.value:
return None
# We'll just test the first URL for connectivity and correctness
test_url = self.to_visit_list[0]
# Check that the URL is allowed and well-formed
try:
protected_url_check(test_url)
except ValueError as e:
raise ConnectorValidationError(
f"Protected URL check failed for '{test_url}': {e}"
)
except ConnectionError as e:
# Typically DNS or other network issues
raise ConnectorValidationError(str(e))
# Make a quick request to see if we get a valid response
try:
check_internet_connection(test_url)
except Exception as e:
err_str = str(e)
if "401" in err_str:
raise CredentialExpiredError(
f"Unauthorized access to '{test_url}': {e}"
)
elif "403" in err_str:
raise InsufficientPermissionsError(
f"Forbidden access to '{test_url}': {e}"
)
elif "404" in err_str:
raise ConnectorValidationError(f"Page not found for '{test_url}': {e}")
elif "Max retries exceeded" in err_str and "NameResolutionError" in err_str:
raise ConnectorValidationError(
f"Unable to resolve hostname for '{test_url}'. Please check the URL and your internet connection."
)
else:
# Could be a 5xx or another error, treat as unexpected
raise UnexpectedError(f"Unexpected error validating '{test_url}': {e}")
if __name__ == "__main__":
connector = WebConnector("https://docs.onyx.app/")

View File

@@ -60,8 +60,9 @@ def count_documents_by_needs_sync(session: Session) -> int:
This function executes the query and returns the count of
documents matching the criteria."""
return (
session.query(DbDocument.id)
count = (
session.query(func.count(DbDocument.id.distinct()))
.select_from(DbDocument)
.join(
DocumentByConnectorCredentialPair,
DbDocument.id == DocumentByConnectorCredentialPair.id,
@@ -72,53 +73,63 @@ def count_documents_by_needs_sync(session: Session) -> int:
DbDocument.last_synced.is_(None),
)
)
.count()
.scalar()
)
return count
def construct_document_select_for_connector_credential_pair_by_needs_sync(
connector_id: int, credential_id: int
) -> Select:
return (
select(DbDocument)
.join(
DocumentByConnectorCredentialPair,
DbDocument.id == DocumentByConnectorCredentialPair.id,
)
.where(
and_(
DocumentByConnectorCredentialPair.connector_id == connector_id,
DocumentByConnectorCredentialPair.credential_id == credential_id,
or_(
DbDocument.last_modified > DbDocument.last_synced,
DbDocument.last_synced.is_(None),
),
)
initial_doc_ids_stmt = select(DocumentByConnectorCredentialPair.id).where(
and_(
DocumentByConnectorCredentialPair.connector_id == connector_id,
DocumentByConnectorCredentialPair.credential_id == credential_id,
)
)
stmt = (
select(DbDocument)
.where(
DbDocument.id.in_(initial_doc_ids_stmt),
or_(
DbDocument.last_modified
> DbDocument.last_synced, # last_modified is newer than last_synced
DbDocument.last_synced.is_(None), # never synced
),
)
.distinct()
)
return stmt
def construct_document_id_select_for_connector_credential_pair_by_needs_sync(
connector_id: int, credential_id: int
) -> Select:
return (
select(DbDocument.id)
.join(
DocumentByConnectorCredentialPair,
DbDocument.id == DocumentByConnectorCredentialPair.id,
)
.where(
and_(
DocumentByConnectorCredentialPair.connector_id == connector_id,
DocumentByConnectorCredentialPair.credential_id == credential_id,
or_(
DbDocument.last_modified > DbDocument.last_synced,
DbDocument.last_synced.is_(None),
),
)
initial_doc_ids_stmt = select(DocumentByConnectorCredentialPair.id).where(
and_(
DocumentByConnectorCredentialPair.connector_id == connector_id,
DocumentByConnectorCredentialPair.credential_id == credential_id,
)
)
stmt = (
select(DbDocument.id)
.where(
DbDocument.id.in_(initial_doc_ids_stmt),
or_(
DbDocument.last_modified
> DbDocument.last_synced, # last_modified is newer than last_synced
DbDocument.last_synced.is_(None), # never synced
),
)
.distinct()
)
return stmt
def get_all_documents_needing_vespa_sync_for_cc_pair(
db_session: Session, cc_pair_id: int

View File

@@ -570,14 +570,6 @@ class Document(Base):
back_populates="documents",
)
__table_args__ = (
Index(
"ix_document_sync_status",
last_modified,
last_synced,
),
)
class Tag(Base):
__tablename__ = "tag"

View File

@@ -23,7 +23,6 @@ class PreviousMessage(BaseModel):
message_type: MessageType
files: list[InMemoryChatFile]
tool_call: ToolCallFinalResult | None
refined_answer_improvement: bool | None
@classmethod
def from_chat_message(
@@ -48,7 +47,6 @@ class PreviousMessage(BaseModel):
)
if chat_message.tool_call
else None,
refined_answer_improvement=chat_message.refined_answer_improvement,
)
def to_langchain_msg(self) -> BaseMessage:

View File

@@ -665,8 +665,7 @@ def associate_credential_to_connector(
logger.error(f"IntegrityError: {e}")
raise HTTPException(status_code=400, detail="Name must be unique")
except Exception as e:
logger.exception(f"Unexpected error: {e}")
except Exception:
raise HTTPException(status_code=500, detail="Unexpected error")

View File

@@ -28,7 +28,6 @@ from onyx.configs.constants import FileOrigin
from onyx.configs.constants import MilestoneRecordType
from onyx.configs.constants import OnyxCeleryPriority
from onyx.configs.constants import OnyxCeleryTask
from onyx.connectors.factory import validate_ccpair_for_user
from onyx.connectors.google_utils.google_auth import (
get_google_oauth_creds,
)
@@ -62,7 +61,6 @@ from onyx.connectors.google_utils.shared_constants import DB_CREDENTIALS_DICT_TO
from onyx.connectors.google_utils.shared_constants import (
GoogleOAuthAuthenticationMethod,
)
from onyx.connectors.interfaces import ConnectorValidationError
from onyx.db.connector import create_connector
from onyx.db.connector import delete_connector
from onyx.db.connector import fetch_connector_by_id
@@ -846,22 +844,11 @@ def create_connector_with_mock_credential(
db_session=db_session,
)
# Store the created connector and credential IDs
connector_id = cast(int, connector_response.id)
credential_id = credential.id
validate_ccpair_for_user(
connector_id=connector_id,
credential_id=credential_id,
db_session=db_session,
user=user,
tenant_id=tenant_id,
)
response = add_credential_to_connector(
db_session=db_session,
user=user,
connector_id=connector_id,
credential_id=credential_id,
connector_id=cast(int, connector_response.id), # will aways be an int
credential_id=credential.id,
access_type=connector_data.access_type,
cc_pair_name=connector_data.name,
groups=connector_data.groups,
@@ -886,12 +873,9 @@ def create_connector_with_mock_credential(
properties=None,
db_session=db_session,
)
return response
except ConnectorValidationError as e:
raise HTTPException(
status_code=400, detail="Connector validation error: " + str(e)
)
except ValueError as e:
raise HTTPException(status_code=400, detail=str(e))

View File

@@ -311,23 +311,19 @@ def bulk_invite_users(
all_emails = list(set(new_invited_emails) | set(initial_invited_users))
number_of_invited_users = write_invited_users(all_emails)
# send out email invitations if enabled
if ENABLE_EMAIL_INVITES:
try:
for email in new_invited_emails:
send_user_email_invite(email, current_user, AUTH_TYPE)
except Exception as e:
logger.error(f"Error sending email invite to invited users: {e}")
if not MULTI_TENANT:
return number_of_invited_users
# for billing purposes, write to the control plane about the number of new users
try:
logger.info("Registering tenant users")
fetch_ee_implementation_or_noop(
"onyx.server.tenants.billing", "register_tenant_users", None
)(tenant_id, get_total_users_count(db_session))
if ENABLE_EMAIL_INVITES:
try:
for email in new_invited_emails:
send_user_email_invite(email, current_user)
except Exception as e:
logger.error(f"Error sending email invite to invited users: {e}")
return number_of_invited_users
except Exception as e:

View File

@@ -45,7 +45,7 @@ class Settings(BaseModel):
gpu_enabled: bool | None = None
application_status: ApplicationStatus = ApplicationStatus.ACTIVE
anonymous_user_enabled: bool | None = None
pro_search_enabled: bool | None = None
pro_search_disabled: bool | None = None
temperature_override_enabled: bool = False
auto_scroll: bool = False

View File

@@ -1,4 +1,3 @@
cohere==5.6.1
posthog==3.7.4
python3-saml==1.15.0
xmlsec==1.3.14

View File

@@ -1,44 +0,0 @@
import pytest
from onyx.connectors.models import Document
from onyx.connectors.web.connector import WEB_CONNECTOR_VALID_SETTINGS
from onyx.connectors.web.connector import WebConnector
# NOTE(rkuo): we will probably need to adjust this test to point at our own test site
# to avoid depending on a third party site
@pytest.fixture
def web_connector(request: pytest.FixtureRequest) -> WebConnector:
scroll_before_scraping = request.param
connector = WebConnector(
base_url="https://developer.onewelcome.com",
web_connector_type=WEB_CONNECTOR_VALID_SETTINGS.SINGLE.value,
scroll_before_scraping=scroll_before_scraping,
)
return connector
@pytest.mark.parametrize("web_connector", [True], indirect=True)
def test_web_connector_scroll(web_connector: WebConnector) -> None:
all_docs: list[Document] = []
document_batches = web_connector.load_from_state()
for doc_batch in document_batches:
for doc in doc_batch:
all_docs.append(doc)
assert len(all_docs) == 1
doc = all_docs[0]
assert "Onegini Identity Cloud" in doc.sections[0].text
@pytest.mark.parametrize("web_connector", [False], indirect=True)
def test_web_connector_no_scroll(web_connector: WebConnector) -> None:
all_docs: list[Document] = []
document_batches = web_connector.load_from_state()
for doc_batch in document_batches:
for doc in doc_batch:
all_docs.append(doc)
assert len(all_docs) == 1
doc = all_docs[0]
assert "Onegini Identity Cloud" not in doc.sections[0].text

View File

@@ -4,24 +4,6 @@ log_format custom_main '$remote_addr - $remote_user [$time_local] "$request" '
'"$http_user_agent" "$http_x_forwarded_for" '
'rt=$request_time';
# Map X-Forwarded-Proto or fallback to $scheme
map $http_x_forwarded_proto $forwarded_proto {
default $http_x_forwarded_proto;
"" $scheme;
}
# Map X-Forwarded-Host or fallback to $host
map $http_x_forwarded_host $forwarded_host {
default $http_x_forwarded_host;
"" $host;
}
# Map X-Forwarded-Port or fallback to server port
map $http_x_forwarded_port $forwarded_port {
default $http_x_forwarded_port;
"" $server_port;
}
upstream api_server {
# fail_timeout=0 means we always retry an upstream even if it failed
# to return a good HTTP response
@@ -39,7 +21,8 @@ upstream web_server {
}
server {
listen 80 default_server;
listen 80;
server_name ${DOMAIN};
client_max_body_size 5G; # Maximum upload size
@@ -53,9 +36,8 @@ server {
# misc headers
proxy_set_header X-Real-IP $remote_addr;
proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
proxy_set_header X-Forwarded-Proto $forwarded_proto;
proxy_set_header X-Forwarded-Host $forwarded_host;
proxy_set_header X-Forwarded-Port $forwarded_port;
proxy_set_header X-Forwarded-Proto $scheme;
proxy_set_header X-Forwarded-Host $host;
proxy_set_header Host $host;
# need to use 1.1 to support chunked transfers
@@ -72,9 +54,8 @@ server {
# misc headers
proxy_set_header X-Real-IP $remote_addr;
proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
proxy_set_header X-Forwarded-Proto $forwarded_proto;
proxy_set_header X-Forwarded-Host $forwarded_host;
proxy_set_header X-Forwarded-Port $forwarded_port;
proxy_set_header X-Forwarded-Proto $scheme;
proxy_set_header X-Forwarded-Host $host;
proxy_set_header Host $host;
proxy_http_version 1.1;
@@ -91,25 +72,14 @@ server {
}
server {
listen 443 ssl default_server;
listen 443 ssl;
server_name ${DOMAIN};
client_max_body_size 5G; # Maximum upload size
location / {
# misc headers
proxy_set_header X-Real-IP $remote_addr;
proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
# don't use forwarded schema, host, or port here - this is the entry point
proxy_set_header X-Forwarded-Proto $scheme;
proxy_set_header X-Forwarded-Host $host;
proxy_set_header X-Forwarded-Port $server_port;
proxy_set_header Host $host;
proxy_http_version 1.1;
proxy_buffering off;
# we don't want nginx trying to do something clever with
# redirects, we set the Host: header above already.
proxy_redirect off;
proxy_pass http://localhost:80;
}

View File

@@ -21,7 +21,8 @@ upstream web_server {
}
server {
listen 80 default_server;
listen 80;
server_name ${DOMAIN};
client_max_body_size 5G; # Maximum upload size
@@ -36,8 +37,7 @@ server {
proxy_set_header X-Real-IP $remote_addr;
proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
proxy_set_header X-Forwarded-Proto $scheme;
proxy_set_header X-Forwarded-Host $host;
proxy_set_header X-Forwarded-Port $server_port;
proxy_set_header X-Forwarded-Host $host;
proxy_set_header Host $host;
# need to use 1.1 to support chunked transfers
@@ -55,8 +55,7 @@ server {
proxy_set_header X-Real-IP $remote_addr;
proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
proxy_set_header X-Forwarded-Proto $scheme;
proxy_set_header X-Forwarded-Host $host;
proxy_set_header X-Forwarded-Port $server_port;
proxy_set_header X-Forwarded-Host $host;
proxy_set_header Host $host;
proxy_http_version 1.1;

View File

@@ -4,24 +4,6 @@ log_format custom_main '$remote_addr - $remote_user [$time_local] "$request" '
'"$http_user_agent" "$http_x_forwarded_for" '
'rt=$request_time';
# Map X-Forwarded-Proto or fallback to $scheme
map $http_x_forwarded_proto $forwarded_proto {
default $http_x_forwarded_proto;
"" $scheme;
}
# Map X-Forwarded-Host or fallback to $host
map $http_x_forwarded_host $forwarded_host {
default $http_x_forwarded_host;
"" $host;
}
# Map X-Forwarded-Port or fallback to server port
map $http_x_forwarded_port $forwarded_port {
default $http_x_forwarded_port;
"" $server_port;
}
upstream api_server {
# fail_timeout=0 means we always retry an upstream even if it failed
# to return a good HTTP response
@@ -39,7 +21,8 @@ upstream web_server {
}
server {
listen 80 default_server;
listen 80;
server_name ${DOMAIN};
client_max_body_size 5G; # Maximum upload size
@@ -53,9 +36,8 @@ server {
# misc headers
proxy_set_header X-Real-IP $remote_addr;
proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
proxy_set_header X-Forwarded-Proto $forwarded_proto;
proxy_set_header X-Forwarded-Host $forwarded_host;
proxy_set_header X-Forwarded-Port $forwarded_port;
proxy_set_header X-Forwarded-Proto $scheme;
proxy_set_header X-Forwarded-Host $host;
proxy_set_header Host $host;
# need to use 1.1 to support chunked transfers
@@ -72,9 +54,8 @@ server {
# misc headers
proxy_set_header X-Real-IP $remote_addr;
proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
proxy_set_header X-Forwarded-Proto $forwarded_proto;
proxy_set_header X-Forwarded-Host $forwarded_host;
proxy_set_header X-Forwarded-Port $forwarded_port;
proxy_set_header X-Forwarded-Proto $scheme;
proxy_set_header X-Forwarded-Host $host;
proxy_set_header Host $host;
proxy_http_version 1.1;
@@ -87,25 +68,14 @@ server {
}
server {
listen 443 ssl default_server;
listen 443 ssl;
server_name ${DOMAIN};
client_max_body_size 5G; # Maximum upload size
location / {
# misc headers
proxy_set_header X-Real-IP $remote_addr;
proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
# don't use forwarded schema, host, or port here - this is the entry point
proxy_set_header X-Forwarded-Proto $scheme;
proxy_set_header X-Forwarded-Host $host;
proxy_set_header X-Forwarded-Port $server_port;
proxy_set_header Host $host;
proxy_http_version 1.1;
proxy_buffering off;
# we don't want nginx trying to do something clever with
# redirects, we set the Host: header above already.
proxy_redirect off;
proxy_pass http://localhost:80;
}

View File

@@ -1,5 +1,5 @@
# fill in the template
envsubst '$SSL_CERT_FILE_NAME $SSL_CERT_KEY_FILE_NAME' < "/etc/nginx/conf.d/$1" > /etc/nginx/conf.d/app.conf
envsubst '$DOMAIN $SSL_CERT_FILE_NAME $SSL_CERT_KEY_FILE_NAME' < "/etc/nginx/conf.d/$1" > /etc/nginx/conf.d/app.conf
# wait for the api_server to be ready
echo "Waiting for API server to boot up; this may take a minute or two..."

View File

@@ -240,11 +240,11 @@ export function SettingsForm() {
/>
<Checkbox
label="Agent Search"
sublabel="If set, users will be able to use Agent Search."
checked={settings.pro_search_enabled ?? true}
label="Pro Search Disabled"
sublabel="If set, users will not be able to use Pro Search."
checked={settings.pro_search_disabled ?? false}
onChange={(e) =>
handleToggleSettingsField("pro_search_enabled", e.target.checked)
handleToggleSettingsField("pro_search_disabled", e.target.checked)
}
/>

View File

@@ -10,7 +10,7 @@ export interface Settings {
notifications: Notification[];
needs_reindexing: boolean;
gpu_enabled: boolean;
pro_search_enabled: boolean | null;
pro_search_disabled: boolean | null;
application_status: ApplicationStatus;
auto_scroll: boolean;
temperature_override_enabled: boolean;

View File

@@ -23,7 +23,6 @@ import {
SubQuestionDetail,
constructSubQuestions,
DocumentsResponse,
AgenticMessageResponseIDInfo,
} from "./interfaces";
import Prism from "prismjs";
@@ -1281,8 +1280,6 @@ export function ChatPage({
let toolCall: ToolCallMetadata | null = null;
let isImprovement: boolean | undefined = undefined;
let isStreamingQuestions = true;
let includeAgentic = false;
let secondLevelMessageId: number | null = null;
let initialFetchDetails: null | {
user_message_id: number;
@@ -1339,7 +1336,7 @@ export function ChatPage({
searchParams.get(SEARCH_PARAM_NAMES.SYSTEM_PROMPT) || undefined,
useExistingUserMessage: isSeededChat,
useLanggraph:
settings?.settings.pro_search_enabled &&
!settings?.settings.pro_search_disabled &&
proSearchEnabled &&
retrievalEnabled,
});
@@ -1420,17 +1417,6 @@ export function ChatPage({
resetRegenerationState();
} else {
const { user_message_id, frozenMessageMap } = initialFetchDetails;
if (Object.hasOwn(packet, "agentic_message_ids")) {
const agenticMessageIds = (packet as AgenticMessageResponseIDInfo)
.agentic_message_ids;
const level1MessageId = agenticMessageIds.find(
(item) => item.level === 1
)?.message_id;
if (level1MessageId) {
secondLevelMessageId = level1MessageId;
includeAgentic = true;
}
}
setChatState((prevState) => {
if (prevState.get(chatSessionIdRef.current!) === "loading") {
@@ -1582,10 +1568,7 @@ export function ChatPage({
};
}
);
} else if (
Object.hasOwn(packet, "error") &&
(packet as any).error != null
) {
} else if (Object.hasOwn(packet, "error")) {
if (
sub_questions.length > 0 &&
sub_questions
@@ -1597,8 +1580,8 @@ export function ChatPage({
setAgenticGenerating(false);
setAlternativeGeneratingAssistant(null);
setSubmittedMessage("");
throw new Error((packet as StreamingError).error);
return;
// throw new Error((packet as StreamingError).error);
} else {
error = (packet as StreamingError).error;
stackTrace = (packet as StreamingError).stack_trace;
@@ -1681,19 +1664,6 @@ export function ChatPage({
second_level_generating: second_level_generating,
agentic_docs: agenticDocs,
},
...(includeAgentic
? [
{
messageId: secondLevelMessageId!,
message: second_level_answer,
type: "assistant" as const,
files: [],
toolCall: null,
parentMessageId:
initialFetchDetails.assistant_message_id!,
},
]
: []),
]);
}
}
@@ -2722,11 +2692,6 @@ export function ChatPage({
? messageHistory[i + 1]?.documents
: undefined;
const nextMessage =
messageHistory[i + 1]?.type === "assistant"
? messageHistory[i + 1]
: undefined;
return (
<div
className="text-text"
@@ -2755,10 +2720,7 @@ export function ChatPage({
selectedMessageForDocDisplay ==
secondLevelMessage?.messageId)
}
isImprovement={
message.isImprovement ||
nextMessage?.isImprovement
}
isImprovement={message.isImprovement}
secondLevelGenerating={
(message.second_level_generating &&
currentSessionChatState !==

View File

@@ -805,12 +805,13 @@ export function ChatInputBar({
)}
</div>
<div className="flex items-center my-auto">
{retrievalEnabled && settings?.settings.pro_search_enabled && (
<AgenticToggle
proSearchEnabled={proSearchEnabled}
setProSearchEnabled={setProSearchEnabled}
/>
)}
{retrievalEnabled &&
!settings?.settings.pro_search_disabled && (
<AgenticToggle
proSearchEnabled={proSearchEnabled}
setProSearchEnabled={setProSearchEnabled}
/>
)}
<button
id="onyx-chat-input-send-button"
className={`cursor-pointer ${

View File

@@ -155,15 +155,6 @@ export interface MessageResponseIDInfo {
reserved_assistant_message_id: number;
}
export interface AgentMessageIDInfo {
level: number;
message_id: number;
}
export interface AgenticMessageResponseIDInfo {
agentic_message_ids: AgentMessageIDInfo[];
}
export interface DocumentsResponse {
top_documents: OnyxDocument[];
rephrased_query: string | null;

View File

@@ -25,7 +25,6 @@ import {
RetrievalType,
StreamingError,
ToolCallMetadata,
AgenticMessageResponseIDInfo,
} from "./interfaces";
import { Persona } from "../admin/assistants/interfaces";
import { ReadonlyURLSearchParams } from "next/navigation";
@@ -155,8 +154,7 @@ export type PacketType =
| AgentAnswerPiece
| SubQuestionPiece
| ExtendedToolResponse
| RefinedAnswerImprovement
| AgenticMessageResponseIDInfo;
| RefinedAnswerImprovement;
export async function* sendMessage({
regenerate,

View File

@@ -21,9 +21,11 @@ import { fetchAssistantData } from "@/lib/chat/fetchAssistantdata";
import { AppProvider } from "@/components/context/AppProvider";
import { PHProvider } from "./providers";
import { getCurrentUserSS } from "@/lib/userSS";
import CardSection from "@/components/admin/CardSection";
import { Suspense } from "react";
import PostHogPageView from "./PostHogPageView";
import Script from "next/script";
import { LogoType } from "@/components/logo/Logo";
import { Hanken_Grotesk } from "next/font/google";
import { WebVitals } from "./web-vitals";
import { ThemeProvider } from "next-themes";

View File

@@ -51,7 +51,7 @@ export async function fetchSettingsSS(): Promise<CombinedSettings | null> {
notifications: [],
needs_reindexing: false,
anonymous_user_enabled: false,
pro_search_enabled: true,
pro_search_disabled: false,
temperature_override_enabled: true,
};
} else {
@@ -95,8 +95,8 @@ export async function fetchSettingsSS(): Promise<CombinedSettings | null> {
}
}
if (settings.pro_search_enabled == null) {
settings.pro_search_enabled = true;
if (enterpriseSettings && settings.pro_search_disabled == null) {
settings.pro_search_disabled = true;
}
const webVersion = getWebVersion();

View File

@@ -152,17 +152,7 @@ export const connectorConfigs: Record<
],
},
],
advanced_values: [
{
type: "checkbox",
query: "Scroll before scraping:",
label: "Scroll before scraping",
description:
"Enable if the website requires scrolling for the desired content to load",
name: "scroll_before_scraping",
optional: true,
},
],
advanced_values: [],
overrideDefaultFreq: 60 * 60 * 24,
},
github: {