mirror of
https://github.com/onyx-dot-app/onyx.git
synced 2026-03-02 06:05:46 +00:00
Compare commits
1 Commits
default_ee
...
connector_
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
8ea54e1f52 |
@@ -74,9 +74,7 @@ jobs:
|
||||
python -m pip install --upgrade pip
|
||||
pip install --retries 5 --timeout 30 -r backend/requirements/default.txt
|
||||
pip install --retries 5 --timeout 30 -r backend/requirements/dev.txt
|
||||
playwright install chromium
|
||||
playwright install-deps chromium
|
||||
|
||||
|
||||
- name: Run Tests
|
||||
shell: script -q -e -c "bash --noprofile --norc -eo pipefail {0}"
|
||||
run: py.test -o junit_family=xunit2 -xv --ff backend/tests/daily/connectors
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
"""remove inactive ccpair status on downgrade
|
||||
|
||||
Revision ID: acaab4ef4507
|
||||
Revises: b388730a2899
|
||||
Revises: b7a7eee5aa15
|
||||
Create Date: 2025-02-16 18:21:41.330212
|
||||
|
||||
"""
|
||||
@@ -12,7 +12,7 @@ from sqlalchemy import update
|
||||
|
||||
# revision identifiers, used by Alembic.
|
||||
revision = "acaab4ef4507"
|
||||
down_revision = "b388730a2899"
|
||||
down_revision = "b7a7eee5aa15"
|
||||
branch_labels = None
|
||||
depends_on = None
|
||||
|
||||
|
||||
@@ -1,27 +0,0 @@
|
||||
"""Add composite index for last_modified and last_synced to document
|
||||
|
||||
Revision ID: f13db29f3101
|
||||
Revises: b388730a2899
|
||||
Create Date: 2025-02-18 22:48:11.511389
|
||||
|
||||
"""
|
||||
from alembic import op
|
||||
|
||||
# revision identifiers, used by Alembic.
|
||||
revision = "f13db29f3101"
|
||||
down_revision = "acaab4ef4507"
|
||||
branch_labels: str | None = None
|
||||
depends_on: str | None = None
|
||||
|
||||
|
||||
def upgrade() -> None:
|
||||
op.create_index(
|
||||
"ix_document_sync_status",
|
||||
"document",
|
||||
["last_modified", "last_synced"],
|
||||
unique=False,
|
||||
)
|
||||
|
||||
|
||||
def downgrade() -> None:
|
||||
op.drop_index("ix_document_sync_status", table_name="document")
|
||||
@@ -10,7 +10,6 @@ from onyx.configs.app_configs import SMTP_PORT
|
||||
from onyx.configs.app_configs import SMTP_SERVER
|
||||
from onyx.configs.app_configs import SMTP_USER
|
||||
from onyx.configs.app_configs import WEB_DOMAIN
|
||||
from onyx.configs.constants import AuthType
|
||||
from onyx.configs.constants import TENANT_ID_COOKIE_NAME
|
||||
from onyx.db.models import User
|
||||
|
||||
@@ -188,51 +187,23 @@ def send_subscription_cancellation_email(user_email: str) -> None:
|
||||
send_email(user_email, subject, html_content, text_content)
|
||||
|
||||
|
||||
def send_user_email_invite(
|
||||
user_email: str, current_user: User, auth_type: AuthType
|
||||
) -> None:
|
||||
def send_user_email_invite(user_email: str, current_user: User) -> None:
|
||||
subject = "Invitation to Join Onyx Organization"
|
||||
heading = "You've Been Invited!"
|
||||
|
||||
# the exact action taken by the user, and thus the message, depends on the auth type
|
||||
message = f"<p>You have been invited by {current_user.email} to join an organization on Onyx.</p>"
|
||||
if auth_type == AuthType.CLOUD:
|
||||
message += (
|
||||
"<p>To join the organization, please click the button below to set a password "
|
||||
"or login with Google and complete your registration.</p>"
|
||||
)
|
||||
elif auth_type == AuthType.BASIC:
|
||||
message += (
|
||||
"<p>To join the organization, please click the button below to set a password "
|
||||
"and complete your registration.</p>"
|
||||
)
|
||||
elif auth_type == AuthType.GOOGLE_OAUTH:
|
||||
message += (
|
||||
"<p>To join the organization, please click the button below to login with Google "
|
||||
"and complete your registration.</p>"
|
||||
)
|
||||
elif auth_type == AuthType.OIDC or auth_type == AuthType.SAML:
|
||||
message += (
|
||||
"<p>To join the organization, please click the button below to"
|
||||
" complete your registration.</p>"
|
||||
)
|
||||
else:
|
||||
raise ValueError(f"Invalid auth type: {auth_type}")
|
||||
|
||||
message = (
|
||||
f"<p>You have been invited by {current_user.email} to join an organization on Onyx.</p>"
|
||||
"<p>To join the organization, please click the button below to set a password "
|
||||
"or login with Google and complete your registration.</p>"
|
||||
)
|
||||
cta_text = "Join Organization"
|
||||
cta_link = f"{WEB_DOMAIN}/auth/signup?email={user_email}"
|
||||
html_content = build_html_email(heading, message, cta_text, cta_link)
|
||||
|
||||
# text content is the fallback for clients that don't support HTML
|
||||
# not as critical, so not having special cases for each auth type
|
||||
text_content = (
|
||||
f"You have been invited by {current_user.email} to join an organization on Onyx.\n"
|
||||
"To join the organization, please visit the following link:\n"
|
||||
f"{WEB_DOMAIN}/auth/signup?email={user_email}\n"
|
||||
"You'll be asked to set a password or login with Google to complete your registration."
|
||||
)
|
||||
if auth_type == AuthType.CLOUD:
|
||||
text_content += "You'll be asked to set a password or login with Google to complete your registration."
|
||||
|
||||
send_email(user_email, subject, html_content, text_content)
|
||||
|
||||
|
||||
|
||||
@@ -95,7 +95,6 @@ from onyx.db.models import User
|
||||
from onyx.db.users import get_user_by_email
|
||||
from onyx.redis.redis_pool import get_async_redis_connection
|
||||
from onyx.redis.redis_pool import get_redis_client
|
||||
from onyx.server.utils import BasicAuthenticationError
|
||||
from onyx.utils.logger import setup_logger
|
||||
from onyx.utils.telemetry import create_milestone_and_report
|
||||
from onyx.utils.telemetry import optional_telemetry
|
||||
@@ -110,6 +109,11 @@ from shared_configs.contextvars import get_current_tenant_id
|
||||
logger = setup_logger()
|
||||
|
||||
|
||||
class BasicAuthenticationError(HTTPException):
|
||||
def __init__(self, detail: str):
|
||||
super().__init__(status_code=status.HTTP_403_FORBIDDEN, detail=detail)
|
||||
|
||||
|
||||
def is_user_admin(user: User | None) -> bool:
|
||||
if AUTH_TYPE == AuthType.DISABLED:
|
||||
return True
|
||||
|
||||
@@ -140,7 +140,7 @@ def on_task_postrun(
|
||||
f"{f'for tenant_id={tenant_id}' if tenant_id else ''}"
|
||||
)
|
||||
|
||||
r = get_redis_client(tenant_id=tenant_id)
|
||||
r = get_redis_client()
|
||||
|
||||
if task_id.startswith(RedisConnectorCredentialPair.PREFIX):
|
||||
r.srem(RedisConnectorCredentialPair.get_taskset_key(), task_id)
|
||||
|
||||
@@ -190,8 +190,7 @@ def create_chat_chain(
|
||||
and previous_message.message_type == MessageType.ASSISTANT
|
||||
and mainline_messages
|
||||
):
|
||||
if current_message.refined_answer_improvement:
|
||||
mainline_messages[-1] = current_message
|
||||
mainline_messages[-1] = current_message
|
||||
else:
|
||||
mainline_messages.append(current_message)
|
||||
|
||||
|
||||
@@ -142,15 +142,6 @@ class MessageResponseIDInfo(BaseModel):
|
||||
reserved_assistant_message_id: int
|
||||
|
||||
|
||||
class AgentMessageIDInfo(BaseModel):
|
||||
level: int
|
||||
message_id: int
|
||||
|
||||
|
||||
class AgenticMessageResponseIDInfo(BaseModel):
|
||||
agentic_message_ids: list[AgentMessageIDInfo]
|
||||
|
||||
|
||||
class StreamingError(BaseModel):
|
||||
error: str
|
||||
stack_trace: str | None = None
|
||||
|
||||
@@ -11,8 +11,6 @@ from onyx.agents.agent_search.orchestration.nodes.call_tool import ToolCallExcep
|
||||
from onyx.chat.answer import Answer
|
||||
from onyx.chat.chat_utils import create_chat_chain
|
||||
from onyx.chat.chat_utils import create_temporary_persona
|
||||
from onyx.chat.models import AgenticMessageResponseIDInfo
|
||||
from onyx.chat.models import AgentMessageIDInfo
|
||||
from onyx.chat.models import AgentSearchPacket
|
||||
from onyx.chat.models import AllCitations
|
||||
from onyx.chat.models import AnswerPostInfo
|
||||
@@ -310,7 +308,6 @@ ChatPacket = (
|
||||
| CustomToolResponse
|
||||
| MessageSpecificCitations
|
||||
| MessageResponseIDInfo
|
||||
| AgenticMessageResponseIDInfo
|
||||
| StreamStopInfo
|
||||
| AgentSearchPacket
|
||||
)
|
||||
@@ -1038,7 +1035,6 @@ def stream_chat_message_objects(
|
||||
next_level = 1
|
||||
prev_message = gen_ai_response_message
|
||||
agent_answers = answer.llm_answer_by_level()
|
||||
agentic_message_ids = []
|
||||
while next_level in agent_answers:
|
||||
next_answer = agent_answers[next_level]
|
||||
info = info_by_subq[
|
||||
@@ -1063,18 +1059,17 @@ def stream_chat_message_objects(
|
||||
refined_answer_improvement=refined_answer_improvement,
|
||||
is_agentic=True,
|
||||
)
|
||||
agentic_message_ids.append(
|
||||
AgentMessageIDInfo(level=next_level, message_id=next_answer_message.id)
|
||||
)
|
||||
next_level += 1
|
||||
prev_message = next_answer_message
|
||||
|
||||
logger.debug("Committing messages")
|
||||
db_session.commit() # actually save user / assistant message
|
||||
|
||||
yield AgenticMessageResponseIDInfo(agentic_message_ids=agentic_message_ids)
|
||||
msg_detail_response = translate_db_message_to_chat_message_detail(
|
||||
gen_ai_response_message
|
||||
)
|
||||
|
||||
yield translate_db_message_to_chat_message_detail(gen_ai_response_message)
|
||||
yield msg_detail_response
|
||||
except Exception as e:
|
||||
error_msg = str(e)
|
||||
logger.exception(error_msg)
|
||||
|
||||
@@ -5,8 +5,6 @@ import requests
|
||||
|
||||
class BookStackClientRequestFailedError(ConnectionError):
|
||||
def __init__(self, status: int, error: str) -> None:
|
||||
self.status_code = status
|
||||
self.error = error
|
||||
super().__init__(
|
||||
"BookStack Client request failed with status {status}: {error}".format(
|
||||
status=status, error=error
|
||||
|
||||
@@ -7,12 +7,8 @@ from typing import Any
|
||||
from onyx.configs.app_configs import INDEX_BATCH_SIZE
|
||||
from onyx.configs.constants import DocumentSource
|
||||
from onyx.connectors.bookstack.client import BookStackApiClient
|
||||
from onyx.connectors.bookstack.client import BookStackClientRequestFailedError
|
||||
from onyx.connectors.cross_connector_utils.miscellaneous_utils import time_str_to_utc
|
||||
from onyx.connectors.interfaces import ConnectorValidationError
|
||||
from onyx.connectors.interfaces import CredentialExpiredError
|
||||
from onyx.connectors.interfaces import GenerateDocumentsOutput
|
||||
from onyx.connectors.interfaces import InsufficientPermissionsError
|
||||
from onyx.connectors.interfaces import LoadConnector
|
||||
from onyx.connectors.interfaces import PollConnector
|
||||
from onyx.connectors.interfaces import SecondsSinceUnixEpoch
|
||||
@@ -218,39 +214,3 @@ class BookstackConnector(LoadConnector, PollConnector):
|
||||
break
|
||||
else:
|
||||
time.sleep(0.2)
|
||||
|
||||
def validate_connector_settings(self) -> None:
|
||||
"""
|
||||
Validate that the BookStack credentials and connector settings are correct.
|
||||
Specifically checks that we can make an authenticated request to BookStack.
|
||||
"""
|
||||
if not self.bookstack_client:
|
||||
raise ConnectorMissingCredentialError(
|
||||
"BookStack credentials have not been loaded."
|
||||
)
|
||||
|
||||
try:
|
||||
# Attempt to fetch a small batch of books (arbitrary endpoint) to verify credentials
|
||||
_ = self.bookstack_client.get(
|
||||
"/books", params={"count": "1", "offset": "0"}
|
||||
)
|
||||
|
||||
except BookStackClientRequestFailedError as e:
|
||||
# Check for HTTP status codes
|
||||
if e.status_code == 401:
|
||||
raise CredentialExpiredError(
|
||||
"Your BookStack credentials appear to be invalid or expired (HTTP 401)."
|
||||
) from e
|
||||
elif e.status_code == 403:
|
||||
raise InsufficientPermissionsError(
|
||||
"The configured BookStack token does not have sufficient permissions (HTTP 403)."
|
||||
) from e
|
||||
else:
|
||||
raise ConnectorValidationError(
|
||||
f"Unexpected BookStack error (status={e.status_code}): {e}"
|
||||
) from e
|
||||
|
||||
except Exception as exc:
|
||||
raise ConnectorValidationError(
|
||||
f"Unexpected error while validating BookStack connector settings: {exc}"
|
||||
) from exc
|
||||
|
||||
@@ -1,4 +1,3 @@
|
||||
import re
|
||||
from collections.abc import Callable
|
||||
from collections.abc import Iterator
|
||||
from datetime import datetime
|
||||
@@ -25,22 +24,16 @@ def datetime_to_utc(dt: datetime) -> datetime:
|
||||
|
||||
|
||||
def time_str_to_utc(datetime_str: str) -> datetime:
|
||||
# Remove all timezone abbreviations in parentheses
|
||||
datetime_str = re.sub(r"\([A-Z]+\)", "", datetime_str).strip()
|
||||
|
||||
# Remove any remaining parentheses and their contents
|
||||
datetime_str = re.sub(r"\(.*?\)", "", datetime_str).strip()
|
||||
|
||||
try:
|
||||
dt = parse(datetime_str)
|
||||
except ValueError:
|
||||
# Fix common format issues (e.g. "0000" => "+0000")
|
||||
# Handle malformed timezone by attempting to fix common format issues
|
||||
if "0000" in datetime_str:
|
||||
datetime_str = datetime_str.replace(" 0000", " +0000")
|
||||
dt = parse(datetime_str)
|
||||
# Convert "0000" to "+0000" for proper timezone parsing
|
||||
fixed_dt_str = datetime_str.replace(" 0000", " +0000")
|
||||
dt = parse(fixed_dt_str)
|
||||
else:
|
||||
raise
|
||||
|
||||
return datetime_to_utc(dt)
|
||||
|
||||
|
||||
|
||||
@@ -195,15 +195,10 @@ def validate_ccpair_for_user(
|
||||
db_session,
|
||||
get_editable=False,
|
||||
)
|
||||
|
||||
if not connector:
|
||||
raise ValueError("Connector not found")
|
||||
|
||||
if connector.source == DocumentSource.INGESTION_API:
|
||||
return
|
||||
|
||||
if not credential:
|
||||
raise ValueError("Credential not found")
|
||||
if not connector:
|
||||
raise ValueError("Connector not found")
|
||||
|
||||
try:
|
||||
runnable_connector = instantiate_connector(
|
||||
@@ -215,6 +210,7 @@ def validate_ccpair_for_user(
|
||||
tenant_id=tenant_id,
|
||||
)
|
||||
except Exception as e:
|
||||
raise ConnectorValidationError(str(e))
|
||||
error_msg = f"Unexpected error creating connector: {e}"
|
||||
raise ConnectorValidationError(error_msg)
|
||||
|
||||
runnable_connector.validate_connector_settings()
|
||||
|
||||
@@ -187,12 +187,12 @@ class FirefliesConnector(PollConnector, LoadConnector):
|
||||
return self._process_transcripts()
|
||||
|
||||
def poll_source(
|
||||
self, start: SecondsSinceUnixEpoch, end: SecondsSinceUnixEpoch
|
||||
self, start_unixtime: SecondsSinceUnixEpoch, end_unixtime: SecondsSinceUnixEpoch
|
||||
) -> GenerateDocumentsOutput:
|
||||
start_datetime = datetime.fromtimestamp(start, tz=timezone.utc).strftime(
|
||||
"%Y-%m-%dT%H:%M:%S.000Z"
|
||||
)
|
||||
end_datetime = datetime.fromtimestamp(end, tz=timezone.utc).strftime(
|
||||
start_datetime = datetime.fromtimestamp(
|
||||
start_unixtime, tz=timezone.utc
|
||||
).strftime("%Y-%m-%dT%H:%M:%S.000Z")
|
||||
end_datetime = datetime.fromtimestamp(end_unixtime, tz=timezone.utc).strftime(
|
||||
"%Y-%m-%dT%H:%M:%S.000Z"
|
||||
)
|
||||
|
||||
|
||||
@@ -229,20 +229,16 @@ class GitbookConnector(LoadConnector, PollConnector):
|
||||
|
||||
try:
|
||||
content = self.client.get(f"/spaces/{self.space_id}/content")
|
||||
pages: list[dict[str, Any]] = content.get("pages", [])
|
||||
pages = content.get("pages", [])
|
||||
|
||||
current_batch: list[Document] = []
|
||||
for page in pages:
|
||||
updated_at = datetime.fromisoformat(page["updatedAt"])
|
||||
|
||||
while pages:
|
||||
page = pages.pop(0)
|
||||
|
||||
updated_at_raw = page.get("updatedAt")
|
||||
if updated_at_raw is None:
|
||||
# if updatedAt is not present, that means the page has never been edited
|
||||
continue
|
||||
|
||||
updated_at = datetime.fromisoformat(updated_at_raw)
|
||||
if start and updated_at < start:
|
||||
continue
|
||||
if current_batch:
|
||||
yield current_batch
|
||||
return
|
||||
if end and updated_at > end:
|
||||
continue
|
||||
|
||||
@@ -254,8 +250,6 @@ class GitbookConnector(LoadConnector, PollConnector):
|
||||
yield current_batch
|
||||
current_batch = []
|
||||
|
||||
pages.extend(page.get("pages", []))
|
||||
|
||||
if current_batch:
|
||||
yield current_batch
|
||||
|
||||
|
||||
@@ -24,7 +24,6 @@ from onyx.connectors.interfaces import InsufficientPermissionsError
|
||||
from onyx.connectors.interfaces import LoadConnector
|
||||
from onyx.connectors.interfaces import PollConnector
|
||||
from onyx.connectors.interfaces import SecondsSinceUnixEpoch
|
||||
from onyx.connectors.interfaces import UnexpectedError
|
||||
from onyx.connectors.models import ConnectorMissingCredentialError
|
||||
from onyx.connectors.models import Document
|
||||
from onyx.connectors.models import Section
|
||||
@@ -246,7 +245,7 @@ class GithubConnector(LoadConnector, PollConnector):
|
||||
test_repo.get_contents("")
|
||||
|
||||
except RateLimitExceededException:
|
||||
raise UnexpectedError(
|
||||
raise ConnectorValidationError(
|
||||
"Validation failed due to GitHub rate-limits being exceeded. Please try again later."
|
||||
)
|
||||
|
||||
|
||||
@@ -297,7 +297,6 @@ class GmailConnector(LoadConnector, PollConnector, SlimConnector):
|
||||
userId=user_email,
|
||||
fields=THREAD_LIST_FIELDS,
|
||||
q=query,
|
||||
continue_on_404_or_403=True,
|
||||
):
|
||||
full_threads = execute_paginated_retrieval(
|
||||
retrieval_function=gmail_service.users().threads().get,
|
||||
|
||||
@@ -25,12 +25,8 @@ from onyx.configs.app_configs import WEB_CONNECTOR_OAUTH_CLIENT_SECRET
|
||||
from onyx.configs.app_configs import WEB_CONNECTOR_OAUTH_TOKEN_URL
|
||||
from onyx.configs.app_configs import WEB_CONNECTOR_VALIDATE_URLS
|
||||
from onyx.configs.constants import DocumentSource
|
||||
from onyx.connectors.interfaces import ConnectorValidationError
|
||||
from onyx.connectors.interfaces import CredentialExpiredError
|
||||
from onyx.connectors.interfaces import GenerateDocumentsOutput
|
||||
from onyx.connectors.interfaces import InsufficientPermissionsError
|
||||
from onyx.connectors.interfaces import LoadConnector
|
||||
from onyx.connectors.interfaces import UnexpectedError
|
||||
from onyx.connectors.models import Document
|
||||
from onyx.connectors.models import Section
|
||||
from onyx.file_processing.extract_file_text import read_pdf_file
|
||||
@@ -41,8 +37,6 @@ from shared_configs.configs import MULTI_TENANT
|
||||
|
||||
logger = setup_logger()
|
||||
|
||||
WEB_CONNECTOR_MAX_SCROLL_ATTEMPTS = 20
|
||||
|
||||
|
||||
class WEB_CONNECTOR_VALID_SETTINGS(str, Enum):
|
||||
# Given a base site, index everything under that path
|
||||
@@ -176,35 +170,26 @@ def start_playwright() -> Tuple[Playwright, BrowserContext]:
|
||||
|
||||
|
||||
def extract_urls_from_sitemap(sitemap_url: str) -> list[str]:
|
||||
try:
|
||||
response = requests.get(sitemap_url)
|
||||
response.raise_for_status()
|
||||
response = requests.get(sitemap_url)
|
||||
response.raise_for_status()
|
||||
|
||||
soup = BeautifulSoup(response.content, "html.parser")
|
||||
urls = [
|
||||
_ensure_absolute_url(sitemap_url, loc_tag.text)
|
||||
for loc_tag in soup.find_all("loc")
|
||||
]
|
||||
soup = BeautifulSoup(response.content, "html.parser")
|
||||
urls = [
|
||||
_ensure_absolute_url(sitemap_url, loc_tag.text)
|
||||
for loc_tag in soup.find_all("loc")
|
||||
]
|
||||
|
||||
if len(urls) == 0 and len(soup.find_all("urlset")) == 0:
|
||||
# the given url doesn't look like a sitemap, let's try to find one
|
||||
urls = list_pages_for_site(sitemap_url)
|
||||
if len(urls) == 0 and len(soup.find_all("urlset")) == 0:
|
||||
# the given url doesn't look like a sitemap, let's try to find one
|
||||
urls = list_pages_for_site(sitemap_url)
|
||||
|
||||
if len(urls) == 0:
|
||||
raise ValueError(
|
||||
f"No URLs found in sitemap {sitemap_url}. Try using the 'single' or 'recursive' scraping options instead."
|
||||
)
|
||||
|
||||
return urls
|
||||
except requests.RequestException as e:
|
||||
raise RuntimeError(f"Failed to fetch sitemap from {sitemap_url}: {e}")
|
||||
except ValueError as e:
|
||||
raise RuntimeError(f"Error processing sitemap {sitemap_url}: {e}")
|
||||
except Exception as e:
|
||||
raise RuntimeError(
|
||||
f"Unexpected error while processing sitemap {sitemap_url}: {e}"
|
||||
if len(urls) == 0:
|
||||
raise ValueError(
|
||||
f"No URLs found in sitemap {sitemap_url}. Try using the 'single' or 'recursive' scraping options instead."
|
||||
)
|
||||
|
||||
return urls
|
||||
|
||||
|
||||
def _ensure_absolute_url(source_url: str, maybe_relative_url: str) -> str:
|
||||
if not urlparse(maybe_relative_url).netloc:
|
||||
@@ -240,14 +225,10 @@ class WebConnector(LoadConnector):
|
||||
web_connector_type: str = WEB_CONNECTOR_VALID_SETTINGS.RECURSIVE.value,
|
||||
mintlify_cleanup: bool = True, # Mostly ok to apply to other websites as well
|
||||
batch_size: int = INDEX_BATCH_SIZE,
|
||||
scroll_before_scraping: bool = False,
|
||||
**kwargs: Any,
|
||||
) -> None:
|
||||
self.mintlify_cleanup = mintlify_cleanup
|
||||
self.batch_size = batch_size
|
||||
self.recursive = False
|
||||
self.scroll_before_scraping = scroll_before_scraping
|
||||
self.web_connector_type = web_connector_type
|
||||
|
||||
if web_connector_type == WEB_CONNECTOR_VALID_SETTINGS.RECURSIVE.value:
|
||||
self.recursive = True
|
||||
@@ -363,18 +344,6 @@ class WebConnector(LoadConnector):
|
||||
continue
|
||||
visited_links.add(current_url)
|
||||
|
||||
if self.scroll_before_scraping:
|
||||
scroll_attempts = 0
|
||||
previous_height = page.evaluate("document.body.scrollHeight")
|
||||
while scroll_attempts < WEB_CONNECTOR_MAX_SCROLL_ATTEMPTS:
|
||||
page.evaluate("window.scrollTo(0, document.body.scrollHeight)")
|
||||
page.wait_for_load_state("networkidle", timeout=30000)
|
||||
new_height = page.evaluate("document.body.scrollHeight")
|
||||
if new_height == previous_height:
|
||||
break # Stop scrolling when no more content is loaded
|
||||
previous_height = new_height
|
||||
scroll_attempts += 1
|
||||
|
||||
content = page.content()
|
||||
soup = BeautifulSoup(content, "html.parser")
|
||||
|
||||
@@ -433,53 +402,6 @@ class WebConnector(LoadConnector):
|
||||
raise RuntimeError(last_error)
|
||||
raise RuntimeError("No valid pages found.")
|
||||
|
||||
def validate_connector_settings(self) -> None:
|
||||
# Make sure we have at least one valid URL to check
|
||||
if not self.to_visit_list:
|
||||
raise ConnectorValidationError(
|
||||
"No URL configured. Please provide at least one valid URL."
|
||||
)
|
||||
|
||||
if self.web_connector_type == WEB_CONNECTOR_VALID_SETTINGS.SITEMAP.value:
|
||||
return None
|
||||
|
||||
# We'll just test the first URL for connectivity and correctness
|
||||
test_url = self.to_visit_list[0]
|
||||
|
||||
# Check that the URL is allowed and well-formed
|
||||
try:
|
||||
protected_url_check(test_url)
|
||||
except ValueError as e:
|
||||
raise ConnectorValidationError(
|
||||
f"Protected URL check failed for '{test_url}': {e}"
|
||||
)
|
||||
except ConnectionError as e:
|
||||
# Typically DNS or other network issues
|
||||
raise ConnectorValidationError(str(e))
|
||||
|
||||
# Make a quick request to see if we get a valid response
|
||||
try:
|
||||
check_internet_connection(test_url)
|
||||
except Exception as e:
|
||||
err_str = str(e)
|
||||
if "401" in err_str:
|
||||
raise CredentialExpiredError(
|
||||
f"Unauthorized access to '{test_url}': {e}"
|
||||
)
|
||||
elif "403" in err_str:
|
||||
raise InsufficientPermissionsError(
|
||||
f"Forbidden access to '{test_url}': {e}"
|
||||
)
|
||||
elif "404" in err_str:
|
||||
raise ConnectorValidationError(f"Page not found for '{test_url}': {e}")
|
||||
elif "Max retries exceeded" in err_str and "NameResolutionError" in err_str:
|
||||
raise ConnectorValidationError(
|
||||
f"Unable to resolve hostname for '{test_url}'. Please check the URL and your internet connection."
|
||||
)
|
||||
else:
|
||||
# Could be a 5xx or another error, treat as unexpected
|
||||
raise UnexpectedError(f"Unexpected error validating '{test_url}': {e}")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
connector = WebConnector("https://docs.onyx.app/")
|
||||
|
||||
@@ -60,8 +60,9 @@ def count_documents_by_needs_sync(session: Session) -> int:
|
||||
This function executes the query and returns the count of
|
||||
documents matching the criteria."""
|
||||
|
||||
return (
|
||||
session.query(DbDocument.id)
|
||||
count = (
|
||||
session.query(func.count(DbDocument.id.distinct()))
|
||||
.select_from(DbDocument)
|
||||
.join(
|
||||
DocumentByConnectorCredentialPair,
|
||||
DbDocument.id == DocumentByConnectorCredentialPair.id,
|
||||
@@ -72,53 +73,63 @@ def count_documents_by_needs_sync(session: Session) -> int:
|
||||
DbDocument.last_synced.is_(None),
|
||||
)
|
||||
)
|
||||
.count()
|
||||
.scalar()
|
||||
)
|
||||
|
||||
return count
|
||||
|
||||
|
||||
def construct_document_select_for_connector_credential_pair_by_needs_sync(
|
||||
connector_id: int, credential_id: int
|
||||
) -> Select:
|
||||
return (
|
||||
select(DbDocument)
|
||||
.join(
|
||||
DocumentByConnectorCredentialPair,
|
||||
DbDocument.id == DocumentByConnectorCredentialPair.id,
|
||||
)
|
||||
.where(
|
||||
and_(
|
||||
DocumentByConnectorCredentialPair.connector_id == connector_id,
|
||||
DocumentByConnectorCredentialPair.credential_id == credential_id,
|
||||
or_(
|
||||
DbDocument.last_modified > DbDocument.last_synced,
|
||||
DbDocument.last_synced.is_(None),
|
||||
),
|
||||
)
|
||||
initial_doc_ids_stmt = select(DocumentByConnectorCredentialPair.id).where(
|
||||
and_(
|
||||
DocumentByConnectorCredentialPair.connector_id == connector_id,
|
||||
DocumentByConnectorCredentialPair.credential_id == credential_id,
|
||||
)
|
||||
)
|
||||
|
||||
stmt = (
|
||||
select(DbDocument)
|
||||
.where(
|
||||
DbDocument.id.in_(initial_doc_ids_stmt),
|
||||
or_(
|
||||
DbDocument.last_modified
|
||||
> DbDocument.last_synced, # last_modified is newer than last_synced
|
||||
DbDocument.last_synced.is_(None), # never synced
|
||||
),
|
||||
)
|
||||
.distinct()
|
||||
)
|
||||
|
||||
return stmt
|
||||
|
||||
|
||||
def construct_document_id_select_for_connector_credential_pair_by_needs_sync(
|
||||
connector_id: int, credential_id: int
|
||||
) -> Select:
|
||||
return (
|
||||
select(DbDocument.id)
|
||||
.join(
|
||||
DocumentByConnectorCredentialPair,
|
||||
DbDocument.id == DocumentByConnectorCredentialPair.id,
|
||||
)
|
||||
.where(
|
||||
and_(
|
||||
DocumentByConnectorCredentialPair.connector_id == connector_id,
|
||||
DocumentByConnectorCredentialPair.credential_id == credential_id,
|
||||
or_(
|
||||
DbDocument.last_modified > DbDocument.last_synced,
|
||||
DbDocument.last_synced.is_(None),
|
||||
),
|
||||
)
|
||||
initial_doc_ids_stmt = select(DocumentByConnectorCredentialPair.id).where(
|
||||
and_(
|
||||
DocumentByConnectorCredentialPair.connector_id == connector_id,
|
||||
DocumentByConnectorCredentialPair.credential_id == credential_id,
|
||||
)
|
||||
)
|
||||
|
||||
stmt = (
|
||||
select(DbDocument.id)
|
||||
.where(
|
||||
DbDocument.id.in_(initial_doc_ids_stmt),
|
||||
or_(
|
||||
DbDocument.last_modified
|
||||
> DbDocument.last_synced, # last_modified is newer than last_synced
|
||||
DbDocument.last_synced.is_(None), # never synced
|
||||
),
|
||||
)
|
||||
.distinct()
|
||||
)
|
||||
|
||||
return stmt
|
||||
|
||||
|
||||
def get_all_documents_needing_vespa_sync_for_cc_pair(
|
||||
db_session: Session, cc_pair_id: int
|
||||
|
||||
@@ -570,14 +570,6 @@ class Document(Base):
|
||||
back_populates="documents",
|
||||
)
|
||||
|
||||
__table_args__ = (
|
||||
Index(
|
||||
"ix_document_sync_status",
|
||||
last_modified,
|
||||
last_synced,
|
||||
),
|
||||
)
|
||||
|
||||
|
||||
class Tag(Base):
|
||||
__tablename__ = "tag"
|
||||
|
||||
@@ -23,7 +23,6 @@ class PreviousMessage(BaseModel):
|
||||
message_type: MessageType
|
||||
files: list[InMemoryChatFile]
|
||||
tool_call: ToolCallFinalResult | None
|
||||
refined_answer_improvement: bool | None
|
||||
|
||||
@classmethod
|
||||
def from_chat_message(
|
||||
@@ -48,7 +47,6 @@ class PreviousMessage(BaseModel):
|
||||
)
|
||||
if chat_message.tool_call
|
||||
else None,
|
||||
refined_answer_improvement=chat_message.refined_answer_improvement,
|
||||
)
|
||||
|
||||
def to_langchain_msg(self) -> BaseMessage:
|
||||
|
||||
@@ -665,8 +665,7 @@ def associate_credential_to_connector(
|
||||
logger.error(f"IntegrityError: {e}")
|
||||
raise HTTPException(status_code=400, detail="Name must be unique")
|
||||
|
||||
except Exception as e:
|
||||
logger.exception(f"Unexpected error: {e}")
|
||||
except Exception:
|
||||
raise HTTPException(status_code=500, detail="Unexpected error")
|
||||
|
||||
|
||||
|
||||
@@ -28,7 +28,6 @@ from onyx.configs.constants import FileOrigin
|
||||
from onyx.configs.constants import MilestoneRecordType
|
||||
from onyx.configs.constants import OnyxCeleryPriority
|
||||
from onyx.configs.constants import OnyxCeleryTask
|
||||
from onyx.connectors.factory import validate_ccpair_for_user
|
||||
from onyx.connectors.google_utils.google_auth import (
|
||||
get_google_oauth_creds,
|
||||
)
|
||||
@@ -62,7 +61,6 @@ from onyx.connectors.google_utils.shared_constants import DB_CREDENTIALS_DICT_TO
|
||||
from onyx.connectors.google_utils.shared_constants import (
|
||||
GoogleOAuthAuthenticationMethod,
|
||||
)
|
||||
from onyx.connectors.interfaces import ConnectorValidationError
|
||||
from onyx.db.connector import create_connector
|
||||
from onyx.db.connector import delete_connector
|
||||
from onyx.db.connector import fetch_connector_by_id
|
||||
@@ -846,22 +844,11 @@ def create_connector_with_mock_credential(
|
||||
db_session=db_session,
|
||||
)
|
||||
|
||||
# Store the created connector and credential IDs
|
||||
connector_id = cast(int, connector_response.id)
|
||||
credential_id = credential.id
|
||||
|
||||
validate_ccpair_for_user(
|
||||
connector_id=connector_id,
|
||||
credential_id=credential_id,
|
||||
db_session=db_session,
|
||||
user=user,
|
||||
tenant_id=tenant_id,
|
||||
)
|
||||
response = add_credential_to_connector(
|
||||
db_session=db_session,
|
||||
user=user,
|
||||
connector_id=connector_id,
|
||||
credential_id=credential_id,
|
||||
connector_id=cast(int, connector_response.id), # will aways be an int
|
||||
credential_id=credential.id,
|
||||
access_type=connector_data.access_type,
|
||||
cc_pair_name=connector_data.name,
|
||||
groups=connector_data.groups,
|
||||
@@ -886,12 +873,9 @@ def create_connector_with_mock_credential(
|
||||
properties=None,
|
||||
db_session=db_session,
|
||||
)
|
||||
|
||||
return response
|
||||
|
||||
except ConnectorValidationError as e:
|
||||
raise HTTPException(
|
||||
status_code=400, detail="Connector validation error: " + str(e)
|
||||
)
|
||||
except ValueError as e:
|
||||
raise HTTPException(status_code=400, detail=str(e))
|
||||
|
||||
|
||||
@@ -311,23 +311,19 @@ def bulk_invite_users(
|
||||
all_emails = list(set(new_invited_emails) | set(initial_invited_users))
|
||||
number_of_invited_users = write_invited_users(all_emails)
|
||||
|
||||
# send out email invitations if enabled
|
||||
if ENABLE_EMAIL_INVITES:
|
||||
try:
|
||||
for email in new_invited_emails:
|
||||
send_user_email_invite(email, current_user, AUTH_TYPE)
|
||||
except Exception as e:
|
||||
logger.error(f"Error sending email invite to invited users: {e}")
|
||||
|
||||
if not MULTI_TENANT:
|
||||
return number_of_invited_users
|
||||
|
||||
# for billing purposes, write to the control plane about the number of new users
|
||||
try:
|
||||
logger.info("Registering tenant users")
|
||||
fetch_ee_implementation_or_noop(
|
||||
"onyx.server.tenants.billing", "register_tenant_users", None
|
||||
)(tenant_id, get_total_users_count(db_session))
|
||||
if ENABLE_EMAIL_INVITES:
|
||||
try:
|
||||
for email in new_invited_emails:
|
||||
send_user_email_invite(email, current_user)
|
||||
except Exception as e:
|
||||
logger.error(f"Error sending email invite to invited users: {e}")
|
||||
|
||||
return number_of_invited_users
|
||||
except Exception as e:
|
||||
|
||||
@@ -45,7 +45,7 @@ class Settings(BaseModel):
|
||||
gpu_enabled: bool | None = None
|
||||
application_status: ApplicationStatus = ApplicationStatus.ACTIVE
|
||||
anonymous_user_enabled: bool | None = None
|
||||
pro_search_enabled: bool | None = None
|
||||
pro_search_disabled: bool | None = None
|
||||
|
||||
temperature_override_enabled: bool = False
|
||||
auto_scroll: bool = False
|
||||
|
||||
@@ -1,4 +1,3 @@
|
||||
cohere==5.6.1
|
||||
posthog==3.7.4
|
||||
python3-saml==1.15.0
|
||||
xmlsec==1.3.14
|
||||
|
||||
@@ -1,44 +0,0 @@
|
||||
import pytest
|
||||
|
||||
from onyx.connectors.models import Document
|
||||
from onyx.connectors.web.connector import WEB_CONNECTOR_VALID_SETTINGS
|
||||
from onyx.connectors.web.connector import WebConnector
|
||||
|
||||
|
||||
# NOTE(rkuo): we will probably need to adjust this test to point at our own test site
|
||||
# to avoid depending on a third party site
|
||||
@pytest.fixture
|
||||
def web_connector(request: pytest.FixtureRequest) -> WebConnector:
|
||||
scroll_before_scraping = request.param
|
||||
connector = WebConnector(
|
||||
base_url="https://developer.onewelcome.com",
|
||||
web_connector_type=WEB_CONNECTOR_VALID_SETTINGS.SINGLE.value,
|
||||
scroll_before_scraping=scroll_before_scraping,
|
||||
)
|
||||
return connector
|
||||
|
||||
|
||||
@pytest.mark.parametrize("web_connector", [True], indirect=True)
|
||||
def test_web_connector_scroll(web_connector: WebConnector) -> None:
|
||||
all_docs: list[Document] = []
|
||||
document_batches = web_connector.load_from_state()
|
||||
for doc_batch in document_batches:
|
||||
for doc in doc_batch:
|
||||
all_docs.append(doc)
|
||||
|
||||
assert len(all_docs) == 1
|
||||
doc = all_docs[0]
|
||||
assert "Onegini Identity Cloud" in doc.sections[0].text
|
||||
|
||||
|
||||
@pytest.mark.parametrize("web_connector", [False], indirect=True)
|
||||
def test_web_connector_no_scroll(web_connector: WebConnector) -> None:
|
||||
all_docs: list[Document] = []
|
||||
document_batches = web_connector.load_from_state()
|
||||
for doc_batch in document_batches:
|
||||
for doc in doc_batch:
|
||||
all_docs.append(doc)
|
||||
|
||||
assert len(all_docs) == 1
|
||||
doc = all_docs[0]
|
||||
assert "Onegini Identity Cloud" not in doc.sections[0].text
|
||||
@@ -4,24 +4,6 @@ log_format custom_main '$remote_addr - $remote_user [$time_local] "$request" '
|
||||
'"$http_user_agent" "$http_x_forwarded_for" '
|
||||
'rt=$request_time';
|
||||
|
||||
# Map X-Forwarded-Proto or fallback to $scheme
|
||||
map $http_x_forwarded_proto $forwarded_proto {
|
||||
default $http_x_forwarded_proto;
|
||||
"" $scheme;
|
||||
}
|
||||
|
||||
# Map X-Forwarded-Host or fallback to $host
|
||||
map $http_x_forwarded_host $forwarded_host {
|
||||
default $http_x_forwarded_host;
|
||||
"" $host;
|
||||
}
|
||||
|
||||
# Map X-Forwarded-Port or fallback to server port
|
||||
map $http_x_forwarded_port $forwarded_port {
|
||||
default $http_x_forwarded_port;
|
||||
"" $server_port;
|
||||
}
|
||||
|
||||
upstream api_server {
|
||||
# fail_timeout=0 means we always retry an upstream even if it failed
|
||||
# to return a good HTTP response
|
||||
@@ -39,7 +21,8 @@ upstream web_server {
|
||||
}
|
||||
|
||||
server {
|
||||
listen 80 default_server;
|
||||
listen 80;
|
||||
server_name ${DOMAIN};
|
||||
|
||||
client_max_body_size 5G; # Maximum upload size
|
||||
|
||||
@@ -53,9 +36,8 @@ server {
|
||||
# misc headers
|
||||
proxy_set_header X-Real-IP $remote_addr;
|
||||
proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
|
||||
proxy_set_header X-Forwarded-Proto $forwarded_proto;
|
||||
proxy_set_header X-Forwarded-Host $forwarded_host;
|
||||
proxy_set_header X-Forwarded-Port $forwarded_port;
|
||||
proxy_set_header X-Forwarded-Proto $scheme;
|
||||
proxy_set_header X-Forwarded-Host $host;
|
||||
proxy_set_header Host $host;
|
||||
|
||||
# need to use 1.1 to support chunked transfers
|
||||
@@ -72,9 +54,8 @@ server {
|
||||
# misc headers
|
||||
proxy_set_header X-Real-IP $remote_addr;
|
||||
proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
|
||||
proxy_set_header X-Forwarded-Proto $forwarded_proto;
|
||||
proxy_set_header X-Forwarded-Host $forwarded_host;
|
||||
proxy_set_header X-Forwarded-Port $forwarded_port;
|
||||
proxy_set_header X-Forwarded-Proto $scheme;
|
||||
proxy_set_header X-Forwarded-Host $host;
|
||||
proxy_set_header Host $host;
|
||||
|
||||
proxy_http_version 1.1;
|
||||
@@ -91,25 +72,14 @@ server {
|
||||
}
|
||||
|
||||
server {
|
||||
listen 443 ssl default_server;
|
||||
listen 443 ssl;
|
||||
server_name ${DOMAIN};
|
||||
|
||||
client_max_body_size 5G; # Maximum upload size
|
||||
|
||||
location / {
|
||||
# misc headers
|
||||
proxy_set_header X-Real-IP $remote_addr;
|
||||
proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
|
||||
# don't use forwarded schema, host, or port here - this is the entry point
|
||||
proxy_set_header X-Forwarded-Proto $scheme;
|
||||
proxy_set_header X-Forwarded-Host $host;
|
||||
proxy_set_header X-Forwarded-Port $server_port;
|
||||
proxy_set_header Host $host;
|
||||
|
||||
proxy_http_version 1.1;
|
||||
proxy_buffering off;
|
||||
# we don't want nginx trying to do something clever with
|
||||
# redirects, we set the Host: header above already.
|
||||
proxy_redirect off;
|
||||
proxy_pass http://localhost:80;
|
||||
}
|
||||
|
||||
|
||||
@@ -21,7 +21,8 @@ upstream web_server {
|
||||
}
|
||||
|
||||
server {
|
||||
listen 80 default_server;
|
||||
listen 80;
|
||||
server_name ${DOMAIN};
|
||||
|
||||
client_max_body_size 5G; # Maximum upload size
|
||||
|
||||
@@ -36,8 +37,7 @@ server {
|
||||
proxy_set_header X-Real-IP $remote_addr;
|
||||
proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
|
||||
proxy_set_header X-Forwarded-Proto $scheme;
|
||||
proxy_set_header X-Forwarded-Host $host;
|
||||
proxy_set_header X-Forwarded-Port $server_port;
|
||||
proxy_set_header X-Forwarded-Host $host;
|
||||
proxy_set_header Host $host;
|
||||
|
||||
# need to use 1.1 to support chunked transfers
|
||||
@@ -55,8 +55,7 @@ server {
|
||||
proxy_set_header X-Real-IP $remote_addr;
|
||||
proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
|
||||
proxy_set_header X-Forwarded-Proto $scheme;
|
||||
proxy_set_header X-Forwarded-Host $host;
|
||||
proxy_set_header X-Forwarded-Port $server_port;
|
||||
proxy_set_header X-Forwarded-Host $host;
|
||||
proxy_set_header Host $host;
|
||||
|
||||
proxy_http_version 1.1;
|
||||
|
||||
@@ -4,24 +4,6 @@ log_format custom_main '$remote_addr - $remote_user [$time_local] "$request" '
|
||||
'"$http_user_agent" "$http_x_forwarded_for" '
|
||||
'rt=$request_time';
|
||||
|
||||
# Map X-Forwarded-Proto or fallback to $scheme
|
||||
map $http_x_forwarded_proto $forwarded_proto {
|
||||
default $http_x_forwarded_proto;
|
||||
"" $scheme;
|
||||
}
|
||||
|
||||
# Map X-Forwarded-Host or fallback to $host
|
||||
map $http_x_forwarded_host $forwarded_host {
|
||||
default $http_x_forwarded_host;
|
||||
"" $host;
|
||||
}
|
||||
|
||||
# Map X-Forwarded-Port or fallback to server port
|
||||
map $http_x_forwarded_port $forwarded_port {
|
||||
default $http_x_forwarded_port;
|
||||
"" $server_port;
|
||||
}
|
||||
|
||||
upstream api_server {
|
||||
# fail_timeout=0 means we always retry an upstream even if it failed
|
||||
# to return a good HTTP response
|
||||
@@ -39,7 +21,8 @@ upstream web_server {
|
||||
}
|
||||
|
||||
server {
|
||||
listen 80 default_server;
|
||||
listen 80;
|
||||
server_name ${DOMAIN};
|
||||
|
||||
client_max_body_size 5G; # Maximum upload size
|
||||
|
||||
@@ -53,9 +36,8 @@ server {
|
||||
# misc headers
|
||||
proxy_set_header X-Real-IP $remote_addr;
|
||||
proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
|
||||
proxy_set_header X-Forwarded-Proto $forwarded_proto;
|
||||
proxy_set_header X-Forwarded-Host $forwarded_host;
|
||||
proxy_set_header X-Forwarded-Port $forwarded_port;
|
||||
proxy_set_header X-Forwarded-Proto $scheme;
|
||||
proxy_set_header X-Forwarded-Host $host;
|
||||
proxy_set_header Host $host;
|
||||
|
||||
# need to use 1.1 to support chunked transfers
|
||||
@@ -72,9 +54,8 @@ server {
|
||||
# misc headers
|
||||
proxy_set_header X-Real-IP $remote_addr;
|
||||
proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
|
||||
proxy_set_header X-Forwarded-Proto $forwarded_proto;
|
||||
proxy_set_header X-Forwarded-Host $forwarded_host;
|
||||
proxy_set_header X-Forwarded-Port $forwarded_port;
|
||||
proxy_set_header X-Forwarded-Proto $scheme;
|
||||
proxy_set_header X-Forwarded-Host $host;
|
||||
proxy_set_header Host $host;
|
||||
|
||||
proxy_http_version 1.1;
|
||||
@@ -87,25 +68,14 @@ server {
|
||||
}
|
||||
|
||||
server {
|
||||
listen 443 ssl default_server;
|
||||
listen 443 ssl;
|
||||
server_name ${DOMAIN};
|
||||
|
||||
client_max_body_size 5G; # Maximum upload size
|
||||
|
||||
location / {
|
||||
# misc headers
|
||||
proxy_set_header X-Real-IP $remote_addr;
|
||||
proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
|
||||
# don't use forwarded schema, host, or port here - this is the entry point
|
||||
proxy_set_header X-Forwarded-Proto $scheme;
|
||||
proxy_set_header X-Forwarded-Host $host;
|
||||
proxy_set_header X-Forwarded-Port $server_port;
|
||||
proxy_set_header Host $host;
|
||||
|
||||
proxy_http_version 1.1;
|
||||
proxy_buffering off;
|
||||
# we don't want nginx trying to do something clever with
|
||||
# redirects, we set the Host: header above already.
|
||||
proxy_redirect off;
|
||||
proxy_pass http://localhost:80;
|
||||
}
|
||||
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
# fill in the template
|
||||
envsubst '$SSL_CERT_FILE_NAME $SSL_CERT_KEY_FILE_NAME' < "/etc/nginx/conf.d/$1" > /etc/nginx/conf.d/app.conf
|
||||
envsubst '$DOMAIN $SSL_CERT_FILE_NAME $SSL_CERT_KEY_FILE_NAME' < "/etc/nginx/conf.d/$1" > /etc/nginx/conf.d/app.conf
|
||||
|
||||
# wait for the api_server to be ready
|
||||
echo "Waiting for API server to boot up; this may take a minute or two..."
|
||||
|
||||
@@ -240,11 +240,11 @@ export function SettingsForm() {
|
||||
/>
|
||||
|
||||
<Checkbox
|
||||
label="Agent Search"
|
||||
sublabel="If set, users will be able to use Agent Search."
|
||||
checked={settings.pro_search_enabled ?? true}
|
||||
label="Pro Search Disabled"
|
||||
sublabel="If set, users will not be able to use Pro Search."
|
||||
checked={settings.pro_search_disabled ?? false}
|
||||
onChange={(e) =>
|
||||
handleToggleSettingsField("pro_search_enabled", e.target.checked)
|
||||
handleToggleSettingsField("pro_search_disabled", e.target.checked)
|
||||
}
|
||||
/>
|
||||
|
||||
|
||||
@@ -10,7 +10,7 @@ export interface Settings {
|
||||
notifications: Notification[];
|
||||
needs_reindexing: boolean;
|
||||
gpu_enabled: boolean;
|
||||
pro_search_enabled: boolean | null;
|
||||
pro_search_disabled: boolean | null;
|
||||
application_status: ApplicationStatus;
|
||||
auto_scroll: boolean;
|
||||
temperature_override_enabled: boolean;
|
||||
|
||||
@@ -23,7 +23,6 @@ import {
|
||||
SubQuestionDetail,
|
||||
constructSubQuestions,
|
||||
DocumentsResponse,
|
||||
AgenticMessageResponseIDInfo,
|
||||
} from "./interfaces";
|
||||
|
||||
import Prism from "prismjs";
|
||||
@@ -1281,8 +1280,6 @@ export function ChatPage({
|
||||
let toolCall: ToolCallMetadata | null = null;
|
||||
let isImprovement: boolean | undefined = undefined;
|
||||
let isStreamingQuestions = true;
|
||||
let includeAgentic = false;
|
||||
let secondLevelMessageId: number | null = null;
|
||||
|
||||
let initialFetchDetails: null | {
|
||||
user_message_id: number;
|
||||
@@ -1339,7 +1336,7 @@ export function ChatPage({
|
||||
searchParams.get(SEARCH_PARAM_NAMES.SYSTEM_PROMPT) || undefined,
|
||||
useExistingUserMessage: isSeededChat,
|
||||
useLanggraph:
|
||||
settings?.settings.pro_search_enabled &&
|
||||
!settings?.settings.pro_search_disabled &&
|
||||
proSearchEnabled &&
|
||||
retrievalEnabled,
|
||||
});
|
||||
@@ -1420,17 +1417,6 @@ export function ChatPage({
|
||||
resetRegenerationState();
|
||||
} else {
|
||||
const { user_message_id, frozenMessageMap } = initialFetchDetails;
|
||||
if (Object.hasOwn(packet, "agentic_message_ids")) {
|
||||
const agenticMessageIds = (packet as AgenticMessageResponseIDInfo)
|
||||
.agentic_message_ids;
|
||||
const level1MessageId = agenticMessageIds.find(
|
||||
(item) => item.level === 1
|
||||
)?.message_id;
|
||||
if (level1MessageId) {
|
||||
secondLevelMessageId = level1MessageId;
|
||||
includeAgentic = true;
|
||||
}
|
||||
}
|
||||
|
||||
setChatState((prevState) => {
|
||||
if (prevState.get(chatSessionIdRef.current!) === "loading") {
|
||||
@@ -1582,10 +1568,7 @@ export function ChatPage({
|
||||
};
|
||||
}
|
||||
);
|
||||
} else if (
|
||||
Object.hasOwn(packet, "error") &&
|
||||
(packet as any).error != null
|
||||
) {
|
||||
} else if (Object.hasOwn(packet, "error")) {
|
||||
if (
|
||||
sub_questions.length > 0 &&
|
||||
sub_questions
|
||||
@@ -1597,8 +1580,8 @@ export function ChatPage({
|
||||
setAgenticGenerating(false);
|
||||
setAlternativeGeneratingAssistant(null);
|
||||
setSubmittedMessage("");
|
||||
|
||||
throw new Error((packet as StreamingError).error);
|
||||
return;
|
||||
// throw new Error((packet as StreamingError).error);
|
||||
} else {
|
||||
error = (packet as StreamingError).error;
|
||||
stackTrace = (packet as StreamingError).stack_trace;
|
||||
@@ -1681,19 +1664,6 @@ export function ChatPage({
|
||||
second_level_generating: second_level_generating,
|
||||
agentic_docs: agenticDocs,
|
||||
},
|
||||
...(includeAgentic
|
||||
? [
|
||||
{
|
||||
messageId: secondLevelMessageId!,
|
||||
message: second_level_answer,
|
||||
type: "assistant" as const,
|
||||
files: [],
|
||||
toolCall: null,
|
||||
parentMessageId:
|
||||
initialFetchDetails.assistant_message_id!,
|
||||
},
|
||||
]
|
||||
: []),
|
||||
]);
|
||||
}
|
||||
}
|
||||
@@ -2722,11 +2692,6 @@ export function ChatPage({
|
||||
? messageHistory[i + 1]?.documents
|
||||
: undefined;
|
||||
|
||||
const nextMessage =
|
||||
messageHistory[i + 1]?.type === "assistant"
|
||||
? messageHistory[i + 1]
|
||||
: undefined;
|
||||
|
||||
return (
|
||||
<div
|
||||
className="text-text"
|
||||
@@ -2755,10 +2720,7 @@ export function ChatPage({
|
||||
selectedMessageForDocDisplay ==
|
||||
secondLevelMessage?.messageId)
|
||||
}
|
||||
isImprovement={
|
||||
message.isImprovement ||
|
||||
nextMessage?.isImprovement
|
||||
}
|
||||
isImprovement={message.isImprovement}
|
||||
secondLevelGenerating={
|
||||
(message.second_level_generating &&
|
||||
currentSessionChatState !==
|
||||
|
||||
@@ -805,12 +805,13 @@ export function ChatInputBar({
|
||||
)}
|
||||
</div>
|
||||
<div className="flex items-center my-auto">
|
||||
{retrievalEnabled && settings?.settings.pro_search_enabled && (
|
||||
<AgenticToggle
|
||||
proSearchEnabled={proSearchEnabled}
|
||||
setProSearchEnabled={setProSearchEnabled}
|
||||
/>
|
||||
)}
|
||||
{retrievalEnabled &&
|
||||
!settings?.settings.pro_search_disabled && (
|
||||
<AgenticToggle
|
||||
proSearchEnabled={proSearchEnabled}
|
||||
setProSearchEnabled={setProSearchEnabled}
|
||||
/>
|
||||
)}
|
||||
<button
|
||||
id="onyx-chat-input-send-button"
|
||||
className={`cursor-pointer ${
|
||||
|
||||
@@ -155,15 +155,6 @@ export interface MessageResponseIDInfo {
|
||||
reserved_assistant_message_id: number;
|
||||
}
|
||||
|
||||
export interface AgentMessageIDInfo {
|
||||
level: number;
|
||||
message_id: number;
|
||||
}
|
||||
|
||||
export interface AgenticMessageResponseIDInfo {
|
||||
agentic_message_ids: AgentMessageIDInfo[];
|
||||
}
|
||||
|
||||
export interface DocumentsResponse {
|
||||
top_documents: OnyxDocument[];
|
||||
rephrased_query: string | null;
|
||||
|
||||
@@ -25,7 +25,6 @@ import {
|
||||
RetrievalType,
|
||||
StreamingError,
|
||||
ToolCallMetadata,
|
||||
AgenticMessageResponseIDInfo,
|
||||
} from "./interfaces";
|
||||
import { Persona } from "../admin/assistants/interfaces";
|
||||
import { ReadonlyURLSearchParams } from "next/navigation";
|
||||
@@ -155,8 +154,7 @@ export type PacketType =
|
||||
| AgentAnswerPiece
|
||||
| SubQuestionPiece
|
||||
| ExtendedToolResponse
|
||||
| RefinedAnswerImprovement
|
||||
| AgenticMessageResponseIDInfo;
|
||||
| RefinedAnswerImprovement;
|
||||
|
||||
export async function* sendMessage({
|
||||
regenerate,
|
||||
|
||||
@@ -21,9 +21,11 @@ import { fetchAssistantData } from "@/lib/chat/fetchAssistantdata";
|
||||
import { AppProvider } from "@/components/context/AppProvider";
|
||||
import { PHProvider } from "./providers";
|
||||
import { getCurrentUserSS } from "@/lib/userSS";
|
||||
import CardSection from "@/components/admin/CardSection";
|
||||
import { Suspense } from "react";
|
||||
import PostHogPageView from "./PostHogPageView";
|
||||
import Script from "next/script";
|
||||
import { LogoType } from "@/components/logo/Logo";
|
||||
import { Hanken_Grotesk } from "next/font/google";
|
||||
import { WebVitals } from "./web-vitals";
|
||||
import { ThemeProvider } from "next-themes";
|
||||
|
||||
@@ -51,7 +51,7 @@ export async function fetchSettingsSS(): Promise<CombinedSettings | null> {
|
||||
notifications: [],
|
||||
needs_reindexing: false,
|
||||
anonymous_user_enabled: false,
|
||||
pro_search_enabled: true,
|
||||
pro_search_disabled: false,
|
||||
temperature_override_enabled: true,
|
||||
};
|
||||
} else {
|
||||
@@ -95,8 +95,8 @@ export async function fetchSettingsSS(): Promise<CombinedSettings | null> {
|
||||
}
|
||||
}
|
||||
|
||||
if (settings.pro_search_enabled == null) {
|
||||
settings.pro_search_enabled = true;
|
||||
if (enterpriseSettings && settings.pro_search_disabled == null) {
|
||||
settings.pro_search_disabled = true;
|
||||
}
|
||||
|
||||
const webVersion = getWebVersion();
|
||||
|
||||
@@ -152,17 +152,7 @@ export const connectorConfigs: Record<
|
||||
],
|
||||
},
|
||||
],
|
||||
advanced_values: [
|
||||
{
|
||||
type: "checkbox",
|
||||
query: "Scroll before scraping:",
|
||||
label: "Scroll before scraping",
|
||||
description:
|
||||
"Enable if the website requires scrolling for the desired content to load",
|
||||
name: "scroll_before_scraping",
|
||||
optional: true,
|
||||
},
|
||||
],
|
||||
advanced_values: [],
|
||||
overrideDefaultFreq: 60 * 60 * 24,
|
||||
},
|
||||
github: {
|
||||
|
||||
Reference in New Issue
Block a user