Compare commits

..

17 Commits

Author SHA1 Message Date
pablonyx
411f912b9f k 2025-04-01 12:32:19 -07:00
pablonyx
7de576ce55 k 2025-04-01 12:31:40 -07:00
pablonyx
b48ebeacab update 2025-04-01 12:30:38 -07:00
pablonyx
d7a2329580 k 2025-04-01 12:04:05 -07:00
pablonyx
4db048797c k 2025-04-01 12:03:52 -07:00
pablonyx
70197e8329 k 2025-04-01 12:03:34 -07:00
pablonyx
ca0738ed65 remove logs 2025-04-01 12:03:34 -07:00
pablonyx
6a0e8a1f8c validate advanced fields + proper yup assurances for lists 2025-04-01 12:03:34 -07:00
pablonyx
f055cbcbad Very minor auth standardization (#4400) 2025-04-01 11:46:49 -07:00
pablonyx
2f3020a4d3 Update migration (#4410) 2025-04-01 09:10:24 -07:00
SubashMohan
4bae1318bb refactor tests for Highspot connector to use mocking for API key retrieval (#4346) 2025-04-01 02:39:05 +00:00
Weves
11c3f44c76 Init engine in slackbot 2025-03-31 17:04:20 -07:00
rkuo-danswer
cb38ac8a97 also set permission upsert to medium priority (#4405)
Co-authored-by: Richard Kuo (Onyx) <rkuo@onyx.app>
2025-03-31 14:59:31 -07:00
pablonyx
b2120b9f39 add user files (#4152) 2025-03-31 21:06:59 +00:00
rkuo-danswer
ccd372cc4a Bugfix/slack rate limiting (#4386)
* use slack's built in rate limit handler for the bot

* WIP

* fix the slack rate limit handler

* change default to 8

* cleanup

* try catch int conversion just in case

* linearize this logic better

* code review comments

---------

Co-authored-by: Richard Kuo (Onyx) <rkuo@onyx.app>
2025-03-31 21:00:26 +00:00
evan-danswer
ea30f1de1e minor improvement to fireflies connector (#4383)
* minor improvement to fireflies connector

* reduce time diff
2025-03-31 20:00:52 +00:00
evan-danswer
a7130681d9 ensure bedrock model contains API key (#4396)
* ensure bedrock model contains API key

* fix storing bug
2025-03-31 19:58:53 +00:00
39 changed files with 4747 additions and 7976 deletions

View File

@@ -0,0 +1,117 @@
"""duplicated no-harm user file migration
Revision ID: 6a804aeb4830
Revises: 8e1ac4f39a9f
Create Date: 2025-04-01 07:26:10.539362
"""
from alembic import op
import sqlalchemy as sa
from sqlalchemy import inspect
import datetime
# revision identifiers, used by Alembic.
revision = "6a804aeb4830"
down_revision = "8e1ac4f39a9f"
branch_labels = None
depends_on = None
def upgrade() -> None:
# Check if user_file table already exists
conn = op.get_bind()
inspector = inspect(conn)
if not inspector.has_table("user_file"):
# Create user_folder table without parent_id
op.create_table(
"user_folder",
sa.Column("id", sa.Integer(), primary_key=True, autoincrement=True),
sa.Column("user_id", sa.UUID(), sa.ForeignKey("user.id"), nullable=True),
sa.Column("name", sa.String(length=255), nullable=True),
sa.Column("description", sa.String(length=255), nullable=True),
sa.Column("display_priority", sa.Integer(), nullable=True, default=0),
sa.Column(
"created_at", sa.DateTime(timezone=True), server_default=sa.func.now()
),
)
# Create user_file table with folder_id instead of parent_folder_id
op.create_table(
"user_file",
sa.Column("id", sa.Integer(), primary_key=True, autoincrement=True),
sa.Column("user_id", sa.UUID(), sa.ForeignKey("user.id"), nullable=True),
sa.Column(
"folder_id",
sa.Integer(),
sa.ForeignKey("user_folder.id"),
nullable=True,
),
sa.Column("link_url", sa.String(), nullable=True),
sa.Column("token_count", sa.Integer(), nullable=True),
sa.Column("file_type", sa.String(), nullable=True),
sa.Column("file_id", sa.String(length=255), nullable=False),
sa.Column("document_id", sa.String(length=255), nullable=False),
sa.Column("name", sa.String(length=255), nullable=False),
sa.Column(
"created_at",
sa.DateTime(),
default=datetime.datetime.utcnow,
),
sa.Column(
"cc_pair_id",
sa.Integer(),
sa.ForeignKey("connector_credential_pair.id"),
nullable=True,
unique=True,
),
)
# Create persona__user_file table
op.create_table(
"persona__user_file",
sa.Column(
"persona_id",
sa.Integer(),
sa.ForeignKey("persona.id"),
primary_key=True,
),
sa.Column(
"user_file_id",
sa.Integer(),
sa.ForeignKey("user_file.id"),
primary_key=True,
),
)
# Create persona__user_folder table
op.create_table(
"persona__user_folder",
sa.Column(
"persona_id",
sa.Integer(),
sa.ForeignKey("persona.id"),
primary_key=True,
),
sa.Column(
"user_folder_id",
sa.Integer(),
sa.ForeignKey("user_folder.id"),
primary_key=True,
),
)
op.add_column(
"connector_credential_pair",
sa.Column("is_user_file", sa.Boolean(), nullable=True, default=False),
)
# Update existing records to have is_user_file=False instead of NULL
op.execute(
"UPDATE connector_credential_pair SET is_user_file = FALSE WHERE is_user_file IS NULL"
)
def downgrade() -> None:
pass

View File

@@ -5,9 +5,9 @@ Revises: 3781a5eb12cb
Create Date: 2025-01-26 16:08:21.551022
"""
from alembic import op
import sqlalchemy as sa
import datetime
from alembic import op
# revision identifiers, used by Alembic.

View File

@@ -36,9 +36,6 @@ from onyx.utils.logger import setup_logger
logger = setup_logger()
router = APIRouter(prefix="/auth/saml")
# Define non-authenticated user roles that should be re-created during SAML login
NON_AUTHENTICATED_ROLES = {UserRole.SLACK_USER, UserRole.EXT_PERM_USER}
async def upsert_saml_user(email: str) -> User:
logger.debug(f"Attempting to upsert SAML user with email: {email}")
@@ -54,7 +51,7 @@ async def upsert_saml_user(email: str) -> User:
try:
user = await user_manager.get_by_email(email)
# If user has a non-authenticated role, treat as non-existent
if user.role in NON_AUTHENTICATED_ROLES:
if not user.role.is_web_login():
raise exceptions.UserNotExists()
return user
except exceptions.UserNotExists:

View File

@@ -80,7 +80,8 @@ def check_for_vespa_sync_task(self: Task, *, tenant_id: str) -> bool | None:
"""Runs periodically to check if any document needs syncing.
Generates sets of tasks for Celery if syncing is needed."""
# Useful for debugging timing issues with reacquisitions. TODO: remove once more generalized logging is in place
# Useful for debugging timing issues with reacquisitions.
# TODO: remove once more generalized logging is in place
task_logger.info("check_for_vespa_sync_task started")
time_start = time.monotonic()

View File

@@ -437,7 +437,7 @@ LINEAR_CLIENT_ID = os.getenv("LINEAR_CLIENT_ID")
LINEAR_CLIENT_SECRET = os.getenv("LINEAR_CLIENT_SECRET")
# Slack specific configs
SLACK_NUM_THREADS = int(os.getenv("SLACK_NUM_THREADS") or 2)
SLACK_NUM_THREADS = int(os.getenv("SLACK_NUM_THREADS") or 8)
DASK_JOB_CLIENT_ENABLED = (
os.environ.get("DASK_JOB_CLIENT_ENABLED", "").lower() == "true"

View File

@@ -45,6 +45,8 @@ _FIREFLIES_API_QUERY = """
}
"""
ONE_MINUTE = 60
def _create_doc_from_transcript(transcript: dict) -> Document | None:
sections: List[TextSection] = []
@@ -106,6 +108,8 @@ def _create_doc_from_transcript(transcript: dict) -> Document | None:
)
# If not all transcripts are being indexed, try using a more-recently-generated
# API key.
class FirefliesConnector(PollConnector, LoadConnector):
def __init__(self, batch_size: int = INDEX_BATCH_SIZE) -> None:
self.batch_size = batch_size
@@ -191,6 +195,9 @@ class FirefliesConnector(PollConnector, LoadConnector):
def poll_source(
self, start: SecondsSinceUnixEpoch, end: SecondsSinceUnixEpoch
) -> GenerateDocumentsOutput:
# add some leeway to account for any timezone funkiness and/or bad handling
# of start time on the Fireflies side
start = max(0, start - ONE_MINUTE)
start_datetime = datetime.fromtimestamp(start, tz=timezone.utc).strftime(
"%Y-%m-%dT%H:%M:%S.000Z"
)

View File

@@ -14,6 +14,8 @@ from typing import cast
from pydantic import BaseModel
from slack_sdk import WebClient
from slack_sdk.errors import SlackApiError
from slack_sdk.http_retry import ConnectionErrorRetryHandler
from slack_sdk.http_retry import RetryHandler
from typing_extensions import override
from onyx.configs.app_configs import ENABLE_EXPENSIVE_EXPERT_CALLS
@@ -26,6 +28,8 @@ from onyx.connectors.exceptions import InsufficientPermissionsError
from onyx.connectors.exceptions import UnexpectedValidationError
from onyx.connectors.interfaces import CheckpointConnector
from onyx.connectors.interfaces import CheckpointOutput
from onyx.connectors.interfaces import CredentialsConnector
from onyx.connectors.interfaces import CredentialsProviderInterface
from onyx.connectors.interfaces import GenerateSlimDocumentOutput
from onyx.connectors.interfaces import SecondsSinceUnixEpoch
from onyx.connectors.interfaces import SlimConnector
@@ -38,15 +42,16 @@ from onyx.connectors.models import DocumentFailure
from onyx.connectors.models import EntityFailure
from onyx.connectors.models import SlimDocument
from onyx.connectors.models import TextSection
from onyx.connectors.slack.onyx_retry_handler import OnyxRedisSlackRetryHandler
from onyx.connectors.slack.utils import expert_info_from_slack_id
from onyx.connectors.slack.utils import get_message_link
from onyx.connectors.slack.utils import make_paginated_slack_api_call_w_retries
from onyx.connectors.slack.utils import make_slack_api_call_w_retries
from onyx.connectors.slack.utils import SlackTextCleaner
from onyx.indexing.indexing_heartbeat import IndexingHeartbeatInterface
from onyx.redis.redis_pool import get_redis_client
from onyx.utils.logger import setup_logger
logger = setup_logger()
_SLACK_LIMIT = 900
@@ -493,9 +498,13 @@ def _process_message(
)
class SlackConnector(SlimConnector, CheckpointConnector[SlackCheckpoint]):
class SlackConnector(
SlimConnector, CredentialsConnector, CheckpointConnector[SlackCheckpoint]
):
FAST_TIMEOUT = 1
MAX_RETRIES = 7 # arbitrarily selected
def __init__(
self,
channels: list[str] | None = None,
@@ -514,16 +523,49 @@ class SlackConnector(SlimConnector, CheckpointConnector[SlackCheckpoint]):
# just used for efficiency
self.text_cleaner: SlackTextCleaner | None = None
self.user_cache: dict[str, BasicExpertInfo | None] = {}
self.credentials_provider: CredentialsProviderInterface | None = None
self.credential_prefix: str | None = None
self.delay_lock: str | None = None # the redis key for the shared lock
self.delay_key: str | None = None # the redis key for the shared delay
def load_credentials(self, credentials: dict[str, Any]) -> dict[str, Any] | None:
raise NotImplementedError("Use set_credentials_provider with this connector.")
def set_credentials_provider(
self, credentials_provider: CredentialsProviderInterface
) -> None:
credentials = credentials_provider.get_credentials()
tenant_id = credentials_provider.get_tenant_id()
self.redis = get_redis_client(tenant_id=tenant_id)
self.credential_prefix = (
f"connector:slack:credential_{credentials_provider.get_provider_key()}"
)
self.delay_lock = f"{self.credential_prefix}:delay_lock"
self.delay_key = f"{self.credential_prefix}:delay"
# NOTE: slack has a built in RateLimitErrorRetryHandler, but it isn't designed
# for concurrent workers. We've extended it with OnyxRedisSlackRetryHandler.
connection_error_retry_handler = ConnectionErrorRetryHandler()
onyx_rate_limit_error_retry_handler = OnyxRedisSlackRetryHandler(
max_retry_count=self.MAX_RETRIES,
delay_lock=self.delay_lock,
delay_key=self.delay_key,
r=self.redis,
)
custom_retry_handlers: list[RetryHandler] = [
connection_error_retry_handler,
onyx_rate_limit_error_retry_handler,
]
bot_token = credentials["slack_bot_token"]
self.client = WebClient(token=bot_token)
self.client = WebClient(token=bot_token, retry_handlers=custom_retry_handlers)
# use for requests that must return quickly (e.g. realtime flows where user is waiting)
self.fast_client = WebClient(
token=bot_token, timeout=SlackConnector.FAST_TIMEOUT
)
self.text_cleaner = SlackTextCleaner(client=self.client)
return None
self.credentials_provider = credentials_provider
def retrieve_all_slim_documents(
self,

View File

@@ -0,0 +1,159 @@
import math
import random
import time
from typing import cast
from typing import Optional
from redis import Redis
from redis.lock import Lock as RedisLock
from slack_sdk.http_retry.handler import RetryHandler
from slack_sdk.http_retry.request import HttpRequest
from slack_sdk.http_retry.response import HttpResponse
from slack_sdk.http_retry.state import RetryState
from onyx.utils.logger import setup_logger
logger = setup_logger()
class OnyxRedisSlackRetryHandler(RetryHandler):
"""
This class uses Redis to share a rate limit among multiple threads.
Threads that encounter a rate limit will observe the shared delay, increment the
shared delay with the retry value, and use the new shared value as a wait interval.
This has the effect of serializing calls when a rate limit is hit, which is what
needs to happens if the server punishes us with additional limiting when we make
a call too early. We believe this is what Slack is doing based on empirical
observation, meaning we see indefinite hangs if we're too aggressive.
Another way to do this is just to do exponential backoff. Might be easier?
Adapted from slack's RateLimitErrorRetryHandler.
"""
LOCK_TTL = 60 # used to serialize access to the retry TTL
LOCK_BLOCKING_TIMEOUT = 60 # how long to wait for the lock
"""RetryHandler that does retries for rate limited errors."""
def __init__(
self,
max_retry_count: int,
delay_lock: str,
delay_key: str,
r: Redis,
):
"""
delay_lock: the redis key to use with RedisLock (to synchronize access to delay_key)
delay_key: the redis key containing a shared TTL
"""
super().__init__(max_retry_count=max_retry_count)
self._redis: Redis = r
self._delay_lock = delay_lock
self._delay_key = delay_key
def _can_retry(
self,
*,
state: RetryState,
request: HttpRequest,
response: Optional[HttpResponse] = None,
error: Optional[Exception] = None,
) -> bool:
return response is not None and response.status_code == 429
def prepare_for_next_attempt(
self,
*,
state: RetryState,
request: HttpRequest,
response: Optional[HttpResponse] = None,
error: Optional[Exception] = None,
) -> None:
"""It seems this function is responsible for the wait to retry ... aka we
actually sleep in this function."""
retry_after_value: list[str] | None = None
retry_after_header_name: Optional[str] = None
duration_s: float = 1.0 # seconds
if response is None:
# NOTE(rkuo): this logic comes from RateLimitErrorRetryHandler.
# This reads oddly, as if the caller itself could raise the exception.
# We don't have the luxury of changing this.
if error:
raise error
return
state.next_attempt_requested = True # this signals the caller to retry
# calculate wait duration based on retry-after + some jitter
for k in response.headers.keys():
if k.lower() == "retry-after":
retry_after_header_name = k
break
try:
if retry_after_header_name is None:
# This situation usually does not arise. Just in case.
raise ValueError(
"OnyxRedisSlackRetryHandler.prepare_for_next_attempt: retry-after header name is None"
)
retry_after_value = response.headers.get(retry_after_header_name)
if not retry_after_value:
raise ValueError(
"OnyxRedisSlackRetryHandler.prepare_for_next_attempt: retry-after header value is None"
)
retry_after_value_int = int(
retry_after_value[0]
) # will raise ValueError if somehow we can't convert to int
jitter = retry_after_value_int * 0.25 * random.random()
duration_s = math.ceil(retry_after_value_int + jitter)
except ValueError:
duration_s += random.random()
# lock and extend the ttl
lock: RedisLock = self._redis.lock(
self._delay_lock,
timeout=OnyxRedisSlackRetryHandler.LOCK_TTL,
thread_local=False,
)
acquired = lock.acquire(
blocking_timeout=OnyxRedisSlackRetryHandler.LOCK_BLOCKING_TIMEOUT / 2
)
ttl_ms: int | None = None
try:
if acquired:
# if we can get the lock, then read and extend the ttl
ttl_ms = cast(int, self._redis.pttl(self._delay_key))
if ttl_ms < 0: # negative values are error status codes ... see docs
ttl_ms = 0
ttl_ms_new = ttl_ms + int(duration_s * 1000.0)
self._redis.set(self._delay_key, "1", px=ttl_ms_new)
else:
# if we can't get the lock, just go ahead.
# TODO: if we know our actual parallelism, multiplying by that
# would be a pretty good idea
ttl_ms_new = int(duration_s * 1000.0)
finally:
if acquired:
lock.release()
logger.warning(
f"OnyxRedisSlackRetryHandler.prepare_for_next_attempt wait: "
f"retry-after={retry_after_value} "
f"shared_delay_ms={ttl_ms} new_shared_delay_ms={ttl_ms_new}"
)
# TODO: would be good to take an event var and sleep in short increments to
# allow for a clean exit / exception
time.sleep(ttl_ms_new / 1000.0)
state.increment_current_attempt()

View File

@@ -1,5 +1,4 @@
import re
import time
from collections.abc import Callable
from collections.abc import Generator
from functools import lru_cache
@@ -64,71 +63,72 @@ def _make_slack_api_call_paginated(
return paginated_call
def make_slack_api_rate_limited(
call: Callable[..., SlackResponse], max_retries: int = 7
) -> Callable[..., SlackResponse]:
"""Wraps calls to slack API so that they automatically handle rate limiting"""
# NOTE(rkuo): we may not need this any more if the integrated retry handlers work as
# expected. Do we want to keep this around?
@wraps(call)
def rate_limited_call(**kwargs: Any) -> SlackResponse:
last_exception = None
# def make_slack_api_rate_limited(
# call: Callable[..., SlackResponse], max_retries: int = 7
# ) -> Callable[..., SlackResponse]:
# """Wraps calls to slack API so that they automatically handle rate limiting"""
for _ in range(max_retries):
try:
# Make the API call
response = call(**kwargs)
# @wraps(call)
# def rate_limited_call(**kwargs: Any) -> SlackResponse:
# last_exception = None
# Check for errors in the response, will raise `SlackApiError`
# if anything went wrong
response.validate()
return response
# for _ in range(max_retries):
# try:
# # Make the API call
# response = call(**kwargs)
except SlackApiError as e:
last_exception = e
try:
error = e.response["error"]
except KeyError:
error = "unknown error"
# # Check for errors in the response, will raise `SlackApiError`
# # if anything went wrong
# response.validate()
# return response
if error == "ratelimited":
# Handle rate limiting: get the 'Retry-After' header value and sleep for that duration
retry_after = int(e.response.headers.get("Retry-After", 1))
logger.info(
f"Slack call rate limited, retrying after {retry_after} seconds. Exception: {e}"
)
time.sleep(retry_after)
elif error in ["already_reacted", "no_reaction", "internal_error"]:
# Log internal_error and return the response instead of failing
logger.warning(
f"Slack call encountered '{error}', skipping and continuing..."
)
return e.response
else:
# Raise the error for non-transient errors
raise
# except SlackApiError as e:
# last_exception = e
# try:
# error = e.response["error"]
# except KeyError:
# error = "unknown error"
# If the code reaches this point, all retries have been exhausted
msg = f"Max retries ({max_retries}) exceeded"
if last_exception:
raise Exception(msg) from last_exception
else:
raise Exception(msg)
# if error == "ratelimited":
# # Handle rate limiting: get the 'Retry-After' header value and sleep for that duration
# retry_after = int(e.response.headers.get("Retry-After", 1))
# logger.info(
# f"Slack call rate limited, retrying after {retry_after} seconds. Exception: {e}"
# )
# time.sleep(retry_after)
# elif error in ["already_reacted", "no_reaction", "internal_error"]:
# # Log internal_error and return the response instead of failing
# logger.warning(
# f"Slack call encountered '{error}', skipping and continuing..."
# )
# return e.response
# else:
# # Raise the error for non-transient errors
# raise
return rate_limited_call
# # If the code reaches this point, all retries have been exhausted
# msg = f"Max retries ({max_retries}) exceeded"
# if last_exception:
# raise Exception(msg) from last_exception
# else:
# raise Exception(msg)
# return rate_limited_call
def make_slack_api_call_w_retries(
call: Callable[..., SlackResponse], **kwargs: Any
) -> SlackResponse:
return basic_retry_wrapper(make_slack_api_rate_limited(call))(**kwargs)
return basic_retry_wrapper(call)(**kwargs)
def make_paginated_slack_api_call_w_retries(
call: Callable[..., SlackResponse], **kwargs: Any
) -> Generator[dict[str, Any], None, None]:
return _make_slack_api_call_paginated(
basic_retry_wrapper(make_slack_api_rate_limited(call))
)(**kwargs)
return _make_slack_api_call_paginated(basic_retry_wrapper(call))(**kwargs)
def expert_info_from_slack_id(
@@ -142,7 +142,7 @@ def expert_info_from_slack_id(
if user_id in user_cache:
return user_cache[user_id]
response = make_slack_api_rate_limited(client.users_info)(user=user_id)
response = client.users_info(user=user_id)
if not response["ok"]:
user_cache[user_id] = None
@@ -175,9 +175,7 @@ class SlackTextCleaner:
def _get_slack_name(self, user_id: str) -> str:
if user_id not in self._id_to_name_map:
try:
response = make_slack_api_rate_limited(self._client.users_info)(
user=user_id
)
response = self._client.users_info(user=user_id)
# prefer display name if set, since that is what is shown in Slack
self._id_to_name_map[user_id] = (
response["user"]["profile"]["display_name"]

View File

@@ -42,6 +42,7 @@ from onyx.context.search.retrieval.search_runner import (
from onyx.db.engine import get_all_tenant_ids
from onyx.db.engine import get_session_with_current_tenant
from onyx.db.engine import get_session_with_tenant
from onyx.db.engine import SqlEngine
from onyx.db.models import SlackBot
from onyx.db.search_settings import get_current_search_settings
from onyx.db.slack_bot import fetch_slack_bot
@@ -972,6 +973,9 @@ def _get_socket_client(
if __name__ == "__main__":
# Initialize the SqlEngine
SqlEngine.init_engine(pool_size=20, max_overflow=5)
# Initialize the tenant handler which will manage tenant connections
logger.info("Starting SlackbotHandler")
tenant_handler = SlackbotHandler()

View File

@@ -195,7 +195,7 @@ class RedisConnectorPermissionSync:
),
queue=OnyxCeleryQueues.DOC_PERMISSIONS_UPSERT,
task_id=custom_task_id,
priority=OnyxCeleryPriority.HIGH,
priority=OnyxCeleryPriority.MEDIUM,
ignore_result=True,
)
async_results.append(result)

View File

@@ -125,6 +125,7 @@ class TenantRedis(redis.Redis):
"hset",
"hdel",
"ttl",
"pttl",
] # Regular methods that need simple prefixing
if item == "scan_iter" or item == "sscan_iter":

View File

@@ -2,6 +2,8 @@ import json
import os
import time
from pathlib import Path
from unittest.mock import MagicMock
from unittest.mock import patch
import pytest
@@ -40,10 +42,13 @@ def highspot_connector() -> HighspotConnector:
return connector
@pytest.mark.xfail(
reason="Accessing postgres that isn't available in connector only tests",
@patch(
"onyx.file_processing.extract_file_text.get_unstructured_api_key",
return_value=None,
)
def test_highspot_connector_basic(highspot_connector: HighspotConnector) -> None:
def test_highspot_connector_basic(
mock_get_api_key: MagicMock, highspot_connector: HighspotConnector
) -> None:
"""Test basic functionality of the Highspot connector."""
all_docs: list[Document] = []
test_data = load_test_data()
@@ -76,10 +81,13 @@ def test_highspot_connector_basic(highspot_connector: HighspotConnector) -> None
assert len(section.text) > 0
@pytest.mark.xfail(
reason="Possibly accessing postgres that isn't available in connector only tests",
@patch(
"onyx.file_processing.extract_file_text.get_unstructured_api_key",
return_value=None,
)
def test_highspot_connector_slim(highspot_connector: HighspotConnector) -> None:
def test_highspot_connector_slim(
mock_get_api_key: MagicMock, highspot_connector: HighspotConnector
) -> None:
"""Test slim document retrieval."""
# Get all doc IDs from the full connector
all_full_doc_ids = set()

View File

@@ -42,7 +42,6 @@ ENV NEXT_PUBLIC_DISABLE_STREAMING=${NEXT_PUBLIC_DISABLE_STREAMING}
ARG NEXT_PUBLIC_NEW_CHAT_DIRECTS_TO_SAME_PERSONA
ENV NEXT_PUBLIC_NEW_CHAT_DIRECTS_TO_SAME_PERSONA=${NEXT_PUBLIC_NEW_CHAT_DIRECTS_TO_SAME_PERSONA}
# allow user to specify custom feedback options
ARG NEXT_PUBLIC_POSITIVE_PREDEFINED_FEEDBACK_OPTIONS
ENV NEXT_PUBLIC_POSITIVE_PREDEFINED_FEEDBACK_OPTIONS=${NEXT_PUBLIC_POSITIVE_PREDEFINED_FEEDBACK_OPTIONS}

12090
web/package-lock.json generated

File diff suppressed because it is too large Load Diff

View File

@@ -93,11 +93,12 @@
},
"devDependencies": {
"@chromatic-com/playwright": "^0.10.2",
"@playwright/test": "^1.39.0",
"@tailwindcss/typography": "^0.5.10",
"@types/chrome": "^0.0.287",
"@types/jest": "^29.5.14",
"chromatic": "^11.25.2",
"eslint": "^8.48.0",
"eslint": "^8.57.1",
"eslint-config-next": "^14.1.0",
"jest": "^29.7.0",
"prettier": "2.8.8",

View File

@@ -17,7 +17,7 @@ export default function PostHogPageView(): null {
// Track pageviews
if (pathname) {
let url = window.origin + pathname;
if (searchParams.toString()) {
if (searchParams?.toString()) {
url = url + `?${searchParams.toString()}`;
}
posthog.capture("$pageview", {

View File

@@ -149,7 +149,7 @@ export function AssistantEditor({
const { refreshAssistants, isImageGenerationAvailable } = useAssistants();
const router = useRouter();
const searchParams = useSearchParams();
const isAdminPage = searchParams.get("admin") === "true";
const isAdminPage = searchParams?.get("admin") === "true";
const { popup, setPopup } = usePopup();
const { labels, refreshLabels, createLabel, updateLabel, deleteLabel } =
@@ -469,8 +469,14 @@ export function AssistantEditor({
description: Yup.string().required(
"Must provide a description for the Assistant"
),
system_prompt: Yup.string(),
task_prompt: Yup.string(),
system_prompt: Yup.string().max(
8000,
"Instructions must be less than 8000 characters"
),
task_prompt: Yup.string().max(
8000,
"Reminders must be less than 8000 characters"
),
is_public: Yup.boolean().required(),
document_set_ids: Yup.array().of(Yup.number()),
num_chunks: Yup.number().nullable(),

View File

@@ -302,11 +302,17 @@ export default function AddConnector({
...connector_specific_config
} = values;
// Apply transforms from connectors.ts configuration
// Apply special transforms according to application logic
const transformedConnectorSpecificConfig = Object.entries(
connector_specific_config
).reduce(
(acc, [key, value]) => {
// Filter out empty strings from arrays
if (Array.isArray(value)) {
value = (value as any[]).filter(
(item) => typeof item !== "string" || item.trim() !== ""
);
}
const matchingConfigValue = configuration.values.find(
(configValue) => configValue.name === key
);

View File

@@ -26,8 +26,8 @@ export default function OAuthCallbackPage() {
);
// Extract query parameters
const code = searchParams.get("code");
const state = searchParams.get("state");
const code = searchParams?.get("code");
const state = searchParams?.get("state");
const pathname = usePathname();
const connector = pathname?.split("/")[3];

View File

@@ -4,7 +4,6 @@ import { useEffect, useState } from "react";
import { usePathname, useRouter, useSearchParams } from "next/navigation";
import { AdminPageTitle } from "@/components/admin/Title";
import { Button } from "@/components/ui/button";
import Title from "@/components/ui/title";
import { KeyIcon } from "@/components/icons/icons";
import { getSourceMetadata, isValidSource } from "@/lib/sources";
import { ConfluenceAccessibleResource, ValidSources } from "@/lib/types";
@@ -74,7 +73,7 @@ export default function OAuthFinalizePage() {
>([]);
// Extract query parameters
const credentialParam = searchParams.get("credential");
const credentialParam = searchParams?.get("credential");
const credential = credentialParam ? parseInt(credentialParam, 10) : NaN;
const pathname = usePathname();
const connector = pathname?.split("/")[3];
@@ -85,7 +84,7 @@ export default function OAuthFinalizePage() {
// connector (url segment)= "google-drive"
// sourceType (for looking up metadata) = "google_drive"
if (isNaN(credential)) {
if (isNaN(credential) || !connector) {
setStatusMessage("Improperly formed OAuth finalization request.");
setStatusDetails("Invalid or missing credential id.");
setIsError(true);

View File

@@ -76,7 +76,7 @@ const RerankingDetailsForm = forwardRef<
function (value) {
const { rerank_provider_type } = this.parent;
return (
rerank_provider_type !== RerankerProvider.COHERE ||
rerank_provider_type === RerankerProvider.LITELLM ||
(value !== null && value !== "")
);
}
@@ -457,7 +457,7 @@ const RerankingDetailsForm = forwardRef<
...values,
rerank_api_key: value,
});
setFieldValue("api_key", value);
setFieldValue("rerank_api_key", value);
}}
type="password"
label={

View File

@@ -23,8 +23,8 @@ const ResetPasswordPage: React.FC = () => {
const { popup, setPopup } = usePopup();
const [isWorking, setIsWorking] = useState(false);
const searchParams = useSearchParams();
const token = searchParams.get("token");
const tenantId = searchParams.get(TENANT_ID_COOKIE_NAME);
const token = searchParams?.get("token");
const tenantId = searchParams?.get(TENANT_ID_COOKIE_NAME);
// Keep search param same name as cookie for simplicity
useEffect(() => {

View File

@@ -15,9 +15,9 @@ export function Verify({ user }: { user: User | null }) {
const [error, setError] = useState("");
const verify = useCallback(async () => {
const token = searchParams.get("token");
const token = searchParams?.get("token");
const firstUser =
searchParams.get("first_user") && NEXT_PUBLIC_CLOUD_ENABLED;
searchParams?.get("first_user") && NEXT_PUBLIC_CLOUD_ENABLED;
if (!token) {
setError(
"Missing verification token. Try requesting a new verification email."

View File

@@ -196,7 +196,9 @@ export function ChatPage({
setCurrentMessageFiles,
} = useDocumentsContext();
const defaultAssistantIdRaw = searchParams.get(SEARCH_PARAM_NAMES.PERSONA_ID);
const defaultAssistantIdRaw = searchParams?.get(
SEARCH_PARAM_NAMES.PERSONA_ID
);
const defaultAssistantId = defaultAssistantIdRaw
? parseInt(defaultAssistantIdRaw)
: undefined;
@@ -252,8 +254,8 @@ export function ChatPage({
);
const { user, isAdmin } = useUser();
const slackChatId = searchParams.get("slackChatId");
const existingChatIdRaw = searchParams.get("chatId");
const slackChatId = searchParams?.get("slackChatId");
const existingChatIdRaw = searchParams?.get("chatId");
const [showHistorySidebar, setShowHistorySidebar] = useState(false);
@@ -275,7 +277,7 @@ export function ChatPage({
const processSearchParamsAndSubmitMessage = (searchParamsString: string) => {
const newSearchParams = new URLSearchParams(searchParamsString);
const message = newSearchParams.get("user-prompt");
const message = newSearchParams?.get("user-prompt");
filterManager.buildFiltersFromQueryString(
newSearchParams.toString(),
@@ -284,7 +286,7 @@ export function ChatPage({
tags
);
const fileDescriptorString = newSearchParams.get(SEARCH_PARAM_NAMES.FILES);
const fileDescriptorString = newSearchParams?.get(SEARCH_PARAM_NAMES.FILES);
const overrideFileDescriptors: FileDescriptor[] = fileDescriptorString
? JSON.parse(decodeURIComponent(fileDescriptorString))
: [];
@@ -324,7 +326,7 @@ export function ChatPage({
: undefined
);
// Gather default temperature settings
const search_param_temperature = searchParams.get(
const search_param_temperature = searchParams?.get(
SEARCH_PARAM_NAMES.TEMPERATURE
);
@@ -551,7 +553,7 @@ export function ChatPage({
if (
newMessageHistory.length === 1 &&
!submitOnLoadPerformed.current &&
searchParams.get(SEARCH_PARAM_NAMES.SEEDED) === "true"
searchParams?.get(SEARCH_PARAM_NAMES.SEEDED) === "true"
) {
submitOnLoadPerformed.current = true;
const seededMessage = newMessageHistory[0].message;
@@ -572,11 +574,11 @@ export function ChatPage({
initialSessionFetch();
// eslint-disable-next-line react-hooks/exhaustive-deps
}, [existingChatSessionId, searchParams.get(SEARCH_PARAM_NAMES.PERSONA_ID)]);
}, [existingChatSessionId, searchParams?.get(SEARCH_PARAM_NAMES.PERSONA_ID)]);
useEffect(() => {
const userFolderId = searchParams.get(SEARCH_PARAM_NAMES.USER_FOLDER_ID);
const allMyDocuments = searchParams.get(
const userFolderId = searchParams?.get(SEARCH_PARAM_NAMES.USER_FOLDER_ID);
const allMyDocuments = searchParams?.get(
SEARCH_PARAM_NAMES.ALL_MY_DOCUMENTS
);
@@ -599,14 +601,14 @@ export function ChatPage({
}
}, [
userFolders,
searchParams.get(SEARCH_PARAM_NAMES.USER_FOLDER_ID),
searchParams.get(SEARCH_PARAM_NAMES.ALL_MY_DOCUMENTS),
searchParams?.get(SEARCH_PARAM_NAMES.USER_FOLDER_ID),
searchParams?.get(SEARCH_PARAM_NAMES.ALL_MY_DOCUMENTS),
addSelectedFolder,
clearSelectedItems,
]);
const [message, setMessage] = useState(
searchParams.get(SEARCH_PARAM_NAMES.USER_PROMPT) || ""
searchParams?.get(SEARCH_PARAM_NAMES.USER_PROMPT) || ""
);
const [completeMessageDetail, setCompleteMessageDetail] = useState<
@@ -1048,7 +1050,7 @@ export function ChatPage({
// Equivalent to `loadNewPageLogic`
useEffect(() => {
if (searchParams.get(SEARCH_PARAM_NAMES.SEND_ON_LOAD)) {
if (searchParams?.get(SEARCH_PARAM_NAMES.SEND_ON_LOAD)) {
processSearchParamsAndSubmitMessage(searchParams.toString());
}
}, [searchParams, router]);
@@ -1231,7 +1233,7 @@ export function ChatPage({
const isNewSession = chatSessionIdRef.current === null;
const searchParamBasedChatSessionName =
searchParams.get(SEARCH_PARAM_NAMES.TITLE) || null;
searchParams?.get(SEARCH_PARAM_NAMES.TITLE) || null;
if (isNewSession) {
currChatSessionId = await createChatSession(
@@ -1409,11 +1411,11 @@ export function ChatPage({
modelVersion:
modelOverride?.modelName ||
llmManager.currentLlm.modelName ||
searchParams.get(SEARCH_PARAM_NAMES.MODEL_VERSION) ||
searchParams?.get(SEARCH_PARAM_NAMES.MODEL_VERSION) ||
undefined,
temperature: llmManager.temperature || undefined,
systemPromptOverride:
searchParams.get(SEARCH_PARAM_NAMES.SYSTEM_PROMPT) || undefined,
searchParams?.get(SEARCH_PARAM_NAMES.SYSTEM_PROMPT) || undefined,
useExistingUserMessage: isSeededChat,
useLanggraph:
settings?.settings.pro_search_enabled &&

View File

@@ -668,7 +668,7 @@ const PARAMS_TO_SKIP = [
];
export function buildChatUrl(
existingSearchParams: ReadonlyURLSearchParams,
existingSearchParams: ReadonlyURLSearchParams | null,
chatSessionId: string | null,
personaId: number | null,
search?: boolean
@@ -685,7 +685,7 @@ export function buildChatUrl(
finalSearchParams.push(`${SEARCH_PARAM_NAMES.PERSONA_ID}=${personaId}`);
}
existingSearchParams.forEach((value, key) => {
existingSearchParams?.forEach((value, key) => {
if (!PARAMS_TO_SKIP.includes(key)) {
finalSearchParams.push(`${key}=${value}`);
}
@@ -719,7 +719,7 @@ export async function uploadFilesForChat(
return [responseJson.files as FileDescriptor[], null];
}
export async function useScrollonStream({
export function useScrollonStream({
chatState,
scrollableDivRef,
scrollDist,
@@ -817,5 +817,5 @@ export async function useScrollonStream({
});
}
}
}, [chatState, distance, scrollDist, scrollableDivRef]);
}, [chatState, distance, scrollDist, scrollableDivRef, enableAutoScroll]);
}

View File

@@ -23,8 +23,10 @@ export const SEARCH_PARAM_NAMES = {
SEND_ON_LOAD: "send-on-load",
};
export function shouldSubmitOnLoad(searchParams: ReadonlyURLSearchParams) {
const rawSubmitOnLoad = searchParams.get(SEARCH_PARAM_NAMES.SUBMIT_ON_LOAD);
export function shouldSubmitOnLoad(
searchParams: ReadonlyURLSearchParams | null
) {
const rawSubmitOnLoad = searchParams?.get(SEARCH_PARAM_NAMES.SUBMIT_ON_LOAD);
if (rawSubmitOnLoad === "true" || rawSubmitOnLoad === "1") {
return true;
}

View File

@@ -104,7 +104,7 @@ export function UserDropdown({
// Construct the current URL
const currentUrl = `${pathname}${
searchParams.toString() ? `?${searchParams.toString()}` : ""
searchParams?.toString() ? `?${searchParams.toString()}` : ""
}`;
// Encode the current URL to use as a redirect parameter

View File

@@ -59,8 +59,8 @@ export function ClientLayout({
const { llmProviders } = useChatContext();
const { popup, setPopup } = usePopup();
if (
pathname.startsWith("/admin/connectors") ||
pathname.startsWith("/admin/embeddings")
(pathname && pathname.startsWith("/admin/connectors")) ||
(pathname && pathname.startsWith("/admin/embeddings"))
) {
return <>{children}</>;
}

View File

@@ -76,7 +76,7 @@ export const ChatProvider: React.FC<{
const { sessions } = await response.json();
setChatSessions(sessions);
const currentSessionId = searchParams.get("chatId");
const currentSessionId = searchParams?.get("chatId");
if (
currentSessionId &&
!sessions.some(

View File

@@ -34,7 +34,7 @@ export const EmbeddingFormProvider: React.FC<{
const pathname = usePathname();
// Initialize formStep based on the URL parameter
const initialStep = parseInt(searchParams.get("step") || "0", 10);
const initialStep = parseInt(searchParams?.get("step") || "0", 10);
const [formStep, setFormStep] = useState(initialStep);
const [formValues, setFormValues] = useState<Record<string, any>>({});
@@ -56,8 +56,10 @@ export const EmbeddingFormProvider: React.FC<{
useEffect(() => {
// Update URL when formStep changes
const updatedSearchParams = new URLSearchParams(searchParams.toString());
const existingStep = updatedSearchParams.get("step");
const updatedSearchParams = new URLSearchParams(
searchParams?.toString() || ""
);
const existingStep = updatedSearchParams?.get("step");
updatedSearchParams.set("step", formStep.toString());
const newUrl = `${pathname}?${updatedSearchParams.toString()}`;
@@ -70,7 +72,7 @@ export const EmbeddingFormProvider: React.FC<{
// Update formStep when URL changes
useEffect(() => {
const stepFromUrl = parseInt(searchParams.get("step") || "0", 10);
const stepFromUrl = parseInt(searchParams?.get("step") || "0", 10);
if (stepFromUrl !== formStep) {
setFormStep(stepFromUrl);
}

View File

@@ -34,7 +34,7 @@ export const FormProvider: React.FC<{
const pathname = usePathname();
// Initialize formStep based on the URL parameter
const initialStep = parseInt(searchParams.get("step") || "0", 10);
const initialStep = parseInt(searchParams?.get("step") || "0", 10);
const [formStep, setFormStep] = useState(initialStep);
const [formValues, setFormValues] = useState<Record<string, any>>({});
@@ -56,8 +56,10 @@ export const FormProvider: React.FC<{
useEffect(() => {
// Update URL when formStep changes
const updatedSearchParams = new URLSearchParams(searchParams.toString());
const existingStep = updatedSearchParams.get("step");
const updatedSearchParams = new URLSearchParams(
searchParams?.toString() || ""
);
const existingStep = updatedSearchParams?.get("step");
updatedSearchParams.set("step", formStep.toString());
const newUrl = `${pathname}?${updatedSearchParams.toString()}`;
@@ -69,7 +71,7 @@ export const FormProvider: React.FC<{
}, [formStep, router, pathname, searchParams]);
useEffect(() => {
const stepFromUrl = parseInt(searchParams.get("step") || "0", 10);
const stepFromUrl = parseInt(searchParams?.get("step") || "0", 10);
if (stepFromUrl !== formStep) {
setFormStep(stepFromUrl);
}

View File

@@ -35,7 +35,7 @@ export const HealthCheckBanner = () => {
useEffect(() => {
if (userError && userError.status === 403) {
logout().then(() => {
if (!pathname.includes("/auth")) {
if (!pathname?.includes("/auth")) {
setShowLoggedOutModal(true);
}
});
@@ -61,7 +61,7 @@ export const HealthCheckBanner = () => {
expirationTimeoutRef.current = setTimeout(() => {
setExpired(true);
if (!pathname.includes("/auth")) {
if (!pathname?.includes("/auth")) {
setShowLoggedOutModal(true);
}
}, timeUntilExpire);
@@ -205,7 +205,7 @@ export const HealthCheckBanner = () => {
}
if (error instanceof RedirectError || expired) {
if (!pathname.includes("/auth")) {
if (!pathname?.includes("/auth")) {
setShowLoggedOutModal(true);
}
return null;

View File

@@ -19,12 +19,12 @@ function setWelcomeFlowComplete() {
Cookies.set(COMPLETED_WELCOME_FLOW_COOKIE, "true", { expires: 365 });
}
export function _CompletedWelcomeFlowDummyComponent() {
export function CompletedWelcomeFlowDummyComponent() {
setWelcomeFlowComplete();
return null;
}
export function _WelcomeModal({ user }: { user: User | null }) {
export function WelcomeModal({ user }: { user: User | null }) {
const router = useRouter();
const [providerOptions, setProviderOptions] = useState<

View File

@@ -1,6 +1,6 @@
import {
_CompletedWelcomeFlowDummyComponent,
_WelcomeModal,
CompletedWelcomeFlowDummyComponent,
WelcomeModal as WelcomeModalComponent,
} from "./WelcomeModal";
import { COMPLETED_WELCOME_FLOW_COOKIE } from "./constants";
import { User } from "@/lib/types";
@@ -24,8 +24,8 @@ export function WelcomeModal({
}) {
const hasCompletedWelcomeFlow = hasCompletedWelcomeFlowSS(requestCookies);
if (hasCompletedWelcomeFlow) {
return <_CompletedWelcomeFlowDummyComponent />;
return <CompletedWelcomeFlowDummyComponent />;
}
return <_WelcomeModal user={user} />;
return <WelcomeModalComponent user={user} />;
}

View File

@@ -31,13 +31,13 @@ export function NewTeamModal() {
const { setPopup } = usePopup();
useEffect(() => {
const hasNewTeamParam = searchParams.has("new_team");
const hasNewTeamParam = searchParams?.has("new_team");
if (hasNewTeamParam) {
setShowNewTeamModal(true);
fetchTenantInfo();
// Remove the new_team parameter from the URL without page reload
const newParams = new URLSearchParams(searchParams.toString());
const newParams = new URLSearchParams(searchParams?.toString() || "");
newParams.delete("new_team");
const newUrl =
window.location.pathname +

View File

@@ -16,7 +16,7 @@ export const usePopupFromQuery = (messages: PopupMessages) => {
const searchParams = new URLSearchParams(window.location.search);
// Get the value for search param with key "message"
const messageValue = searchParams.get("message");
const messageValue = searchParams?.get("message");
// Check if any key from messages object is present in search params
if (messageValue && messageValue in messages) {

View File

@@ -148,7 +148,7 @@ function usePaginatedFetch<T extends PaginatedType>({
// Updates the URL with the current page number
const updatePageUrl = useCallback(
(page: number) => {
if (currentPath) {
if (currentPath && searchParams) {
const params = new URLSearchParams(searchParams);
params.set("page", page.toString());
router.replace(`${currentPath}?${params.toString()}`, {

View File

@@ -1333,10 +1333,10 @@ export function createConnectorValidationSchema(
): Yup.ObjectSchema<Record<string, any>> {
const configuration = connectorConfigs[connector];
return Yup.object().shape({
const object = Yup.object().shape({
access_type: Yup.string().required("Access Type is required"),
name: Yup.string().required("Connector Name is required"),
...configuration.values.reduce(
...[...configuration.values, ...configuration.advanced_values].reduce(
(acc, field) => {
let schema: any =
field.type === "select"
@@ -1363,6 +1363,8 @@ export function createConnectorValidationSchema(
pruneFreq: Yup.number().min(0, "Prune frequency must be non-negative"),
refreshFreq: Yup.number().min(0, "Refresh frequency must be non-negative"),
});
return object;
}
export const defaultPruneFreqDays = 30; // 30 days