fix

Update migration (#4410 )
refactor tests for Highspot connector to use mocking for API key retrieval (#4346 )
2026-02-26 12:15:48 +00:00 · 2025-04-01 11:27:58 -07:00 · 2025-04-01 09:10:24 -07:00 · 2025-04-01 02:39:05 +00:00 · 2025-03-31 17:04:20 -07:00 · 2025-03-31 14:59:31 -07:00
16 changed files with 6163 additions and 6097 deletions
--- a/backend/alembic/versions/6a804aeb4830_duplicated_no_harm_user_file_migration.py
+++ b/backend/alembic/versions/6a804aeb4830_duplicated_no_harm_user_file_migration.py
@@ -0,0 +1,117 @@
+"""duplicated no-harm user file migration
+
+Revision ID: 6a804aeb4830
+Revises: 8e1ac4f39a9f
+Create Date: 2025-04-01 07:26:10.539362
+
+"""
+from alembic import op
+import sqlalchemy as sa
+from sqlalchemy import inspect
+import datetime
+
+
+# revision identifiers, used by Alembic.
+revision = "6a804aeb4830"
+down_revision = "8e1ac4f39a9f"
+branch_labels = None
+depends_on = None
+
+
+def upgrade() -> None:
+    # Check if user_file table already exists
+    conn = op.get_bind()
+    inspector = inspect(conn)
+
+    if not inspector.has_table("user_file"):
+        # Create user_folder table without parent_id
+        op.create_table(
+            "user_folder",
+            sa.Column("id", sa.Integer(), primary_key=True, autoincrement=True),
+            sa.Column("user_id", sa.UUID(), sa.ForeignKey("user.id"), nullable=True),
+            sa.Column("name", sa.String(length=255), nullable=True),
+            sa.Column("description", sa.String(length=255), nullable=True),
+            sa.Column("display_priority", sa.Integer(), nullable=True, default=0),
+            sa.Column(
+                "created_at", sa.DateTime(timezone=True), server_default=sa.func.now()
+            ),
+        )
+
+        # Create user_file table with folder_id instead of parent_folder_id
+        op.create_table(
+            "user_file",
+            sa.Column("id", sa.Integer(), primary_key=True, autoincrement=True),
+            sa.Column("user_id", sa.UUID(), sa.ForeignKey("user.id"), nullable=True),
+            sa.Column(
+                "folder_id",
+                sa.Integer(),
+                sa.ForeignKey("user_folder.id"),
+                nullable=True,
+            ),
+            sa.Column("link_url", sa.String(), nullable=True),
+            sa.Column("token_count", sa.Integer(), nullable=True),
+            sa.Column("file_type", sa.String(), nullable=True),
+            sa.Column("file_id", sa.String(length=255), nullable=False),
+            sa.Column("document_id", sa.String(length=255), nullable=False),
+            sa.Column("name", sa.String(length=255), nullable=False),
+            sa.Column(
+                "created_at",
+                sa.DateTime(),
+                default=datetime.datetime.utcnow,
+            ),
+            sa.Column(
+                "cc_pair_id",
+                sa.Integer(),
+                sa.ForeignKey("connector_credential_pair.id"),
+                nullable=True,
+                unique=True,
+            ),
+        )
+
+        # Create persona__user_file table
+        op.create_table(
+            "persona__user_file",
+            sa.Column(
+                "persona_id",
+                sa.Integer(),
+                sa.ForeignKey("persona.id"),
+                primary_key=True,
+            ),
+            sa.Column(
+                "user_file_id",
+                sa.Integer(),
+                sa.ForeignKey("user_file.id"),
+                primary_key=True,
+            ),
+        )
+
+        # Create persona__user_folder table
+        op.create_table(
+            "persona__user_folder",
+            sa.Column(
+                "persona_id",
+                sa.Integer(),
+                sa.ForeignKey("persona.id"),
+                primary_key=True,
+            ),
+            sa.Column(
+                "user_folder_id",
+                sa.Integer(),
+                sa.ForeignKey("user_folder.id"),
+                primary_key=True,
+            ),
+        )
+
+        op.add_column(
+            "connector_credential_pair",
+            sa.Column("is_user_file", sa.Boolean(), nullable=True, default=False),
+        )
+
+        # Update existing records to have is_user_file=False instead of NULL
+        op.execute(
+            "UPDATE connector_credential_pair SET is_user_file = FALSE WHERE is_user_file IS NULL"
+        )
+
+
+def downgrade() -> None:
+    pass
--- a/backend/alembic/versions/9aadf32dfeb4_add_user_files.py
+++ b/backend/alembic/versions/9aadf32dfeb4_add_user_files.py
@@ -5,9 +5,9 @@ Revises: 3781a5eb12cb
 Create Date: 2025-01-26 16:08:21.551022

 """
-from alembic import op
 import sqlalchemy as sa
 import datetime
+from alembic import op


 # revision identifiers, used by Alembic.
--- a/backend/alembic/versions/e995bdf0d6f7_max_length_for_instruction_system_prompt.py
+++ b/backend/alembic/versions/e995bdf0d6f7_max_length_for_instruction_system_prompt.py
@@ -0,0 +1,52 @@
+"""max_length_for_instruction_system_prompt
+
+Revision ID: e995bdf0d6f7
+Revises: 8e1ac4f39a9f
+Create Date: 2025-04-01 18:32:45.123456
+
+"""
+from alembic import op
+import sqlalchemy as sa
+
+
+# revision identifiers, used by Alembic.
+revision = "e995bdf0d6f7"
+down_revision = "8e1ac4f39a9f"
+branch_labels = None
+depends_on = None
+
+
+def upgrade() -> None:
+    # Alter system_prompt and task_prompt columns to have a maximum length of 8000 characters
+    op.alter_column(
+        "prompt",
+        "system_prompt",
+        existing_type=sa.Text(),
+        type_=sa.String(8000),
+        existing_nullable=False,
+    )
+    op.alter_column(
+        "prompt",
+        "task_prompt",
+        existing_type=sa.Text(),
+        type_=sa.String(8000),
+        existing_nullable=False,
+    )
+
+
+def downgrade() -> None:
+    # Revert system_prompt and task_prompt columns back to Text type
+    op.alter_column(
+        "prompt",
+        "system_prompt",
+        existing_type=sa.String(8000),
+        type_=sa.Text(),
+        existing_nullable=False,
+    )
+    op.alter_column(
+        "prompt",
+        "task_prompt",
+        existing_type=sa.String(8000),
+        type_=sa.Text(),
+        existing_nullable=False,
+    )
--- a/backend/onyx/background/celery/tasks/vespa/tasks.py
+++ b/backend/onyx/background/celery/tasks/vespa/tasks.py
@@ -80,7 +80,8 @@ def check_for_vespa_sync_task(self: Task, *, tenant_id: str) -> bool | None:
    """Runs periodically to check if any document needs syncing.
    Generates sets of tasks for Celery if syncing is needed."""

-    # Useful for debugging timing issues with reacquisitions. TODO: remove once more generalized logging is in place
+    # Useful for debugging timing issues with reacquisitions.
+    # TODO: remove once more generalized logging is in place
    task_logger.info("check_for_vespa_sync_task started")

    time_start = time.monotonic()
--- a/backend/onyx/configs/app_configs.py
+++ b/backend/onyx/configs/app_configs.py
@@ -437,7 +437,7 @@ LINEAR_CLIENT_ID = os.getenv("LINEAR_CLIENT_ID")
 LINEAR_CLIENT_SECRET = os.getenv("LINEAR_CLIENT_SECRET")

 # Slack specific configs
-SLACK_NUM_THREADS = int(os.getenv("SLACK_NUM_THREADS") or 2)
+SLACK_NUM_THREADS = int(os.getenv("SLACK_NUM_THREADS") or 8)

 DASK_JOB_CLIENT_ENABLED = (
    os.environ.get("DASK_JOB_CLIENT_ENABLED", "").lower() == "true"
--- a/backend/onyx/connectors/fireflies/connector.py
+++ b/backend/onyx/connectors/fireflies/connector.py
@@ -45,6 +45,8 @@ _FIREFLIES_API_QUERY = """
    }
 """

+ONE_MINUTE = 60
+

 def _create_doc_from_transcript(transcript: dict) -> Document | None:
    sections: List[TextSection] = []
@@ -106,6 +108,8 @@ def _create_doc_from_transcript(transcript: dict) -> Document | None:
    )


+# If not all transcripts are being indexed, try using a more-recently-generated
+# API key.
 class FirefliesConnector(PollConnector, LoadConnector):
    def __init__(self, batch_size: int = INDEX_BATCH_SIZE) -> None:
        self.batch_size = batch_size
@@ -191,6 +195,9 @@ class FirefliesConnector(PollConnector, LoadConnector):
    def poll_source(
        self, start: SecondsSinceUnixEpoch, end: SecondsSinceUnixEpoch
    ) -> GenerateDocumentsOutput:
+        # add some leeway to account for any timezone funkiness and/or bad handling
+        # of start time on the Fireflies side
+        start = max(0, start - ONE_MINUTE)
        start_datetime = datetime.fromtimestamp(start, tz=timezone.utc).strftime(
            "%Y-%m-%dT%H:%M:%S.000Z"
        )
--- a/backend/onyx/connectors/slack/connector.py
+++ b/backend/onyx/connectors/slack/connector.py
@@ -14,6 +14,8 @@ from typing import cast
 from pydantic import BaseModel
 from slack_sdk import WebClient
 from slack_sdk.errors import SlackApiError
+from slack_sdk.http_retry import ConnectionErrorRetryHandler
+from slack_sdk.http_retry import RetryHandler
 from typing_extensions import override

 from onyx.configs.app_configs import ENABLE_EXPENSIVE_EXPERT_CALLS
@@ -26,6 +28,8 @@ from onyx.connectors.exceptions import InsufficientPermissionsError
 from onyx.connectors.exceptions import UnexpectedValidationError
 from onyx.connectors.interfaces import CheckpointConnector
 from onyx.connectors.interfaces import CheckpointOutput
+from onyx.connectors.interfaces import CredentialsConnector
+from onyx.connectors.interfaces import CredentialsProviderInterface
 from onyx.connectors.interfaces import GenerateSlimDocumentOutput
 from onyx.connectors.interfaces import SecondsSinceUnixEpoch
 from onyx.connectors.interfaces import SlimConnector
@@ -38,15 +42,16 @@ from onyx.connectors.models import DocumentFailure
 from onyx.connectors.models import EntityFailure
 from onyx.connectors.models import SlimDocument
 from onyx.connectors.models import TextSection
+from onyx.connectors.slack.onyx_retry_handler import OnyxRedisSlackRetryHandler
 from onyx.connectors.slack.utils import expert_info_from_slack_id
 from onyx.connectors.slack.utils import get_message_link
 from onyx.connectors.slack.utils import make_paginated_slack_api_call_w_retries
 from onyx.connectors.slack.utils import make_slack_api_call_w_retries
 from onyx.connectors.slack.utils import SlackTextCleaner
 from onyx.indexing.indexing_heartbeat import IndexingHeartbeatInterface
+from onyx.redis.redis_pool import get_redis_client
 from onyx.utils.logger import setup_logger

-
 logger = setup_logger()

 _SLACK_LIMIT = 900
@@ -493,9 +498,13 @@ def _process_message(
        )


-class SlackConnector(SlimConnector, CheckpointConnector[SlackCheckpoint]):
+class SlackConnector(
+    SlimConnector, CredentialsConnector, CheckpointConnector[SlackCheckpoint]
+):
    FAST_TIMEOUT = 1

+    MAX_RETRIES = 7  # arbitrarily selected
+
    def __init__(
        self,
        channels: list[str] | None = None,
@@ -514,16 +523,49 @@ class SlackConnector(SlimConnector, CheckpointConnector[SlackCheckpoint]):
        # just used for efficiency
        self.text_cleaner: SlackTextCleaner | None = None
        self.user_cache: dict[str, BasicExpertInfo | None] = {}
+        self.credentials_provider: CredentialsProviderInterface | None = None
+        self.credential_prefix: str | None = None
+        self.delay_lock: str | None = None  # the redis key for the shared lock
+        self.delay_key: str | None = None  # the redis key for the shared delay

    def load_credentials(self, credentials: dict[str, Any]) -> dict[str, Any] | None:
+        raise NotImplementedError("Use set_credentials_provider with this connector.")
+
+    def set_credentials_provider(
+        self, credentials_provider: CredentialsProviderInterface
+    ) -> None:
+        credentials = credentials_provider.get_credentials()
+        tenant_id = credentials_provider.get_tenant_id()
+        self.redis = get_redis_client(tenant_id=tenant_id)
+
+        self.credential_prefix = (
+            f"connector:slack:credential_{credentials_provider.get_provider_key()}"
+        )
+        self.delay_lock = f"{self.credential_prefix}:delay_lock"
+        self.delay_key = f"{self.credential_prefix}:delay"
+
+        # NOTE: slack has a built in RateLimitErrorRetryHandler, but it isn't designed
+        # for concurrent workers. We've extended it with OnyxRedisSlackRetryHandler.
+        connection_error_retry_handler = ConnectionErrorRetryHandler()
+        onyx_rate_limit_error_retry_handler = OnyxRedisSlackRetryHandler(
+            max_retry_count=self.MAX_RETRIES,
+            delay_lock=self.delay_lock,
+            delay_key=self.delay_key,
+            r=self.redis,
+        )
+        custom_retry_handlers: list[RetryHandler] = [
+            connection_error_retry_handler,
+            onyx_rate_limit_error_retry_handler,
+        ]
+
        bot_token = credentials["slack_bot_token"]
-        self.client = WebClient(token=bot_token)
+        self.client = WebClient(token=bot_token, retry_handlers=custom_retry_handlers)
        # use for requests that must return quickly (e.g. realtime flows where user is waiting)
        self.fast_client = WebClient(
            token=bot_token, timeout=SlackConnector.FAST_TIMEOUT
        )
        self.text_cleaner = SlackTextCleaner(client=self.client)
-        return None
+        self.credentials_provider = credentials_provider

    def retrieve_all_slim_documents(
        self,
--- a/backend/onyx/connectors/slack/onyx_retry_handler.py
+++ b/backend/onyx/connectors/slack/onyx_retry_handler.py
@@ -0,0 +1,159 @@
+import math
+import random
+import time
+from typing import cast
+from typing import Optional
+
+from redis import Redis
+from redis.lock import Lock as RedisLock
+from slack_sdk.http_retry.handler import RetryHandler
+from slack_sdk.http_retry.request import HttpRequest
+from slack_sdk.http_retry.response import HttpResponse
+from slack_sdk.http_retry.state import RetryState
+
+from onyx.utils.logger import setup_logger
+
+logger = setup_logger()
+
+
+class OnyxRedisSlackRetryHandler(RetryHandler):
+    """
+    This class uses Redis to share a rate limit among multiple threads.
+
+    Threads that encounter a rate limit will observe the shared delay, increment the
+    shared delay with the retry value, and use the new shared value as a wait interval.
+
+    This has the effect of serializing calls when a rate limit is hit, which is what
+    needs to happens if the server punishes us with additional limiting when we make
+    a call too early. We believe this is what Slack is doing based on empirical
+    observation, meaning we see indefinite hangs if we're too aggressive.
+
+    Another way to do this is just to do exponential backoff. Might be easier?
+
+    Adapted from slack's RateLimitErrorRetryHandler.
+    """
+
+    LOCK_TTL = 60  # used to serialize access to the retry TTL
+    LOCK_BLOCKING_TIMEOUT = 60  # how long to wait for the lock
+
+    """RetryHandler that does retries for rate limited errors."""
+
+    def __init__(
+        self,
+        max_retry_count: int,
+        delay_lock: str,
+        delay_key: str,
+        r: Redis,
+    ):
+        """
+        delay_lock: the redis key to use with RedisLock (to synchronize access to delay_key)
+        delay_key: the redis key containing a shared TTL
+        """
+        super().__init__(max_retry_count=max_retry_count)
+        self._redis: Redis = r
+        self._delay_lock = delay_lock
+        self._delay_key = delay_key
+
+    def _can_retry(
+        self,
+        *,
+        state: RetryState,
+        request: HttpRequest,
+        response: Optional[HttpResponse] = None,
+        error: Optional[Exception] = None,
+    ) -> bool:
+        return response is not None and response.status_code == 429
+
+    def prepare_for_next_attempt(
+        self,
+        *,
+        state: RetryState,
+        request: HttpRequest,
+        response: Optional[HttpResponse] = None,
+        error: Optional[Exception] = None,
+    ) -> None:
+        """It seems this function is responsible for the wait to retry ... aka we
+        actually sleep in this function."""
+        retry_after_value: list[str] | None = None
+        retry_after_header_name: Optional[str] = None
+        duration_s: float = 1.0  # seconds
+
+        if response is None:
+            # NOTE(rkuo): this logic comes from RateLimitErrorRetryHandler.
+            # This reads oddly, as if the caller itself could raise the exception.
+            # We don't have the luxury of changing this.
+            if error:
+                raise error
+
+            return
+
+        state.next_attempt_requested = True  # this signals the caller to retry
+
+        # calculate wait duration based on retry-after + some jitter
+        for k in response.headers.keys():
+            if k.lower() == "retry-after":
+                retry_after_header_name = k
+                break
+
+        try:
+            if retry_after_header_name is None:
+                # This situation usually does not arise. Just in case.
+                raise ValueError(
+                    "OnyxRedisSlackRetryHandler.prepare_for_next_attempt: retry-after header name is None"
+                )
+
+            retry_after_value = response.headers.get(retry_after_header_name)
+            if not retry_after_value:
+                raise ValueError(
+                    "OnyxRedisSlackRetryHandler.prepare_for_next_attempt: retry-after header value is None"
+                )
+
+            retry_after_value_int = int(
+                retry_after_value[0]
+            )  # will raise ValueError if somehow we can't convert to int
+            jitter = retry_after_value_int * 0.25 * random.random()
+            duration_s = math.ceil(retry_after_value_int + jitter)
+        except ValueError:
+            duration_s += random.random()
+
+        # lock and extend the ttl
+        lock: RedisLock = self._redis.lock(
+            self._delay_lock,
+            timeout=OnyxRedisSlackRetryHandler.LOCK_TTL,
+            thread_local=False,
+        )
+
+        acquired = lock.acquire(
+            blocking_timeout=OnyxRedisSlackRetryHandler.LOCK_BLOCKING_TIMEOUT / 2
+        )
+
+        ttl_ms: int | None = None
+
+        try:
+            if acquired:
+                # if we can get the lock, then read and extend the ttl
+                ttl_ms = cast(int, self._redis.pttl(self._delay_key))
+                if ttl_ms < 0:  # negative values are error status codes ... see docs
+                    ttl_ms = 0
+                ttl_ms_new = ttl_ms + int(duration_s * 1000.0)
+                self._redis.set(self._delay_key, "1", px=ttl_ms_new)
+            else:
+                # if we can't get the lock, just go ahead.
+                # TODO: if we know our actual parallelism, multiplying by that
+                # would be a pretty good idea
+                ttl_ms_new = int(duration_s * 1000.0)
+        finally:
+            if acquired:
+                lock.release()
+
+        logger.warning(
+            f"OnyxRedisSlackRetryHandler.prepare_for_next_attempt wait: "
+            f"retry-after={retry_after_value} "
+            f"shared_delay_ms={ttl_ms} new_shared_delay_ms={ttl_ms_new}"
+        )
+
+        # TODO: would be good to take an event var and sleep in short increments to
+        # allow for a clean exit / exception
+        time.sleep(ttl_ms_new / 1000.0)
+
+        state.increment_current_attempt()
--- a/backend/onyx/connectors/slack/utils.py
+++ b/backend/onyx/connectors/slack/utils.py
@@ -1,5 +1,4 @@
 import re
-import time
 from collections.abc import Callable
 from collections.abc import Generator
 from functools import lru_cache
@@ -64,71 +63,72 @@ def _make_slack_api_call_paginated(
    return paginated_call


-def make_slack_api_rate_limited(
-    call: Callable[..., SlackResponse], max_retries: int = 7
-) -> Callable[..., SlackResponse]:
-    """Wraps calls to slack API so that they automatically handle rate limiting"""
+# NOTE(rkuo): we may not need this any more if the integrated retry handlers work as
+# expected.  Do we want to keep this around?

-    @wraps(call)
-    def rate_limited_call(**kwargs: Any) -> SlackResponse:
-        last_exception = None
+# def make_slack_api_rate_limited(
+#     call: Callable[..., SlackResponse], max_retries: int = 7
+# ) -> Callable[..., SlackResponse]:
+#     """Wraps calls to slack API so that they automatically handle rate limiting"""

-        for _ in range(max_retries):
-            try:
-                # Make the API call
-                response = call(**kwargs)
+#     @wraps(call)
+#     def rate_limited_call(**kwargs: Any) -> SlackResponse:
+#         last_exception = None

-                # Check for errors in the response, will raise `SlackApiError`
-                # if anything went wrong
-                response.validate()
-                return response
+#         for _ in range(max_retries):
+#             try:
+#                 # Make the API call
+#                 response = call(**kwargs)

-            except SlackApiError as e:
-                last_exception = e
-                try:
-                    error = e.response["error"]
-                except KeyError:
-                    error = "unknown error"
+#                 # Check for errors in the response, will raise `SlackApiError`
+#                 # if anything went wrong
+#                 response.validate()
+#                 return response

-                if error == "ratelimited":
-                    # Handle rate limiting: get the 'Retry-After' header value and sleep for that duration
-                    retry_after = int(e.response.headers.get("Retry-After", 1))
-                    logger.info(
-                        f"Slack call rate limited, retrying after {retry_after} seconds. Exception: {e}"
-                    )
-                    time.sleep(retry_after)
-                elif error in ["already_reacted", "no_reaction", "internal_error"]:
-                    # Log internal_error and return the response instead of failing
-                    logger.warning(
-                        f"Slack call encountered '{error}', skipping and continuing..."
-                    )
-                    return e.response
-                else:
-                    # Raise the error for non-transient errors
-                    raise
+#             except SlackApiError as e:
+#                 last_exception = e
+#                 try:
+#                     error = e.response["error"]
+#                 except KeyError:
+#                     error = "unknown error"

-        # If the code reaches this point, all retries have been exhausted
-        msg = f"Max retries ({max_retries}) exceeded"
-        if last_exception:
-            raise Exception(msg) from last_exception
-        else:
-            raise Exception(msg)
+#                 if error == "ratelimited":
+#                     # Handle rate limiting: get the 'Retry-After' header value and sleep for that duration
+#                     retry_after = int(e.response.headers.get("Retry-After", 1))
+#                     logger.info(
+#                         f"Slack call rate limited, retrying after {retry_after} seconds. Exception: {e}"
+#                     )
+#                     time.sleep(retry_after)
+#                 elif error in ["already_reacted", "no_reaction", "internal_error"]:
+#                     # Log internal_error and return the response instead of failing
+#                     logger.warning(
+#                         f"Slack call encountered '{error}', skipping and continuing..."
+#                     )
+#                     return e.response
+#                 else:
+#                     # Raise the error for non-transient errors
+#                     raise

-    return rate_limited_call
+#         # If the code reaches this point, all retries have been exhausted
+#         msg = f"Max retries ({max_retries}) exceeded"
+#         if last_exception:
+#             raise Exception(msg) from last_exception
+#         else:
+#             raise Exception(msg)
+
+#     return rate_limited_call


 def make_slack_api_call_w_retries(
    call: Callable[..., SlackResponse], **kwargs: Any
 ) -> SlackResponse:
-    return basic_retry_wrapper(make_slack_api_rate_limited(call))(**kwargs)
+    return basic_retry_wrapper(call)(**kwargs)


 def make_paginated_slack_api_call_w_retries(
    call: Callable[..., SlackResponse], **kwargs: Any
 ) -> Generator[dict[str, Any], None, None]:
-    return _make_slack_api_call_paginated(
-        basic_retry_wrapper(make_slack_api_rate_limited(call))
-    )(**kwargs)
+    return _make_slack_api_call_paginated(basic_retry_wrapper(call))(**kwargs)


 def expert_info_from_slack_id(
@@ -142,7 +142,7 @@ def expert_info_from_slack_id(
    if user_id in user_cache:
        return user_cache[user_id]

-    response = make_slack_api_rate_limited(client.users_info)(user=user_id)
+    response = client.users_info(user=user_id)

    if not response["ok"]:
        user_cache[user_id] = None
@@ -175,9 +175,7 @@ class SlackTextCleaner:
    def _get_slack_name(self, user_id: str) -> str:
        if user_id not in self._id_to_name_map:
            try:
-                response = make_slack_api_rate_limited(self._client.users_info)(
-                    user=user_id
-                )
+                response = self._client.users_info(user=user_id)
                # prefer display name if set, since that is what is shown in Slack
                self._id_to_name_map[user_id] = (
                    response["user"]["profile"]["display_name"]
--- a/backend/onyx/onyxbot/slack/listener.py
+++ b/backend/onyx/onyxbot/slack/listener.py
@@ -42,6 +42,7 @@ from onyx.context.search.retrieval.search_runner import (
 from onyx.db.engine import get_all_tenant_ids
 from onyx.db.engine import get_session_with_current_tenant
 from onyx.db.engine import get_session_with_tenant
+from onyx.db.engine import SqlEngine
 from onyx.db.models import SlackBot
 from onyx.db.search_settings import get_current_search_settings
 from onyx.db.slack_bot import fetch_slack_bot
@@ -972,6 +973,9 @@ def _get_socket_client(


 if __name__ == "__main__":
+    # Initialize the SqlEngine
+    SqlEngine.init_engine(pool_size=20, max_overflow=5)
+
    # Initialize the tenant handler which will manage tenant connections
    logger.info("Starting SlackbotHandler")
    tenant_handler = SlackbotHandler()
--- a/backend/onyx/redis/redis_connector_doc_perm_sync.py
+++ b/backend/onyx/redis/redis_connector_doc_perm_sync.py
@@ -195,7 +195,7 @@ class RedisConnectorPermissionSync:
                ),
                queue=OnyxCeleryQueues.DOC_PERMISSIONS_UPSERT,
                task_id=custom_task_id,
-                priority=OnyxCeleryPriority.HIGH,
+                priority=OnyxCeleryPriority.MEDIUM,
                ignore_result=True,
            )
            async_results.append(result)
--- a/backend/onyx/redis/redis_pool.py
+++ b/backend/onyx/redis/redis_pool.py
@@ -125,6 +125,7 @@ class TenantRedis(redis.Redis):
            "hset",
            "hdel",
            "ttl",
+            "pttl",
        ]  # Regular methods that need simple prefixing

        if item == "scan_iter" or item == "sscan_iter":
--- a/backend/tests/daily/connectors/highspot/test_highspot_connector.py
+++ b/backend/tests/daily/connectors/highspot/test_highspot_connector.py
@@ -2,6 +2,8 @@ import json
 import os
 import time
 from pathlib import Path
+from unittest.mock import MagicMock
+from unittest.mock import patch

 import pytest

@@ -40,10 +42,13 @@ def highspot_connector() -> HighspotConnector:
    return connector


-@pytest.mark.xfail(
-    reason="Accessing postgres that isn't available in connector only tests",
+@patch(
+    "onyx.file_processing.extract_file_text.get_unstructured_api_key",
+    return_value=None,
 )
-def test_highspot_connector_basic(highspot_connector: HighspotConnector) -> None:
+def test_highspot_connector_basic(
+    mock_get_api_key: MagicMock, highspot_connector: HighspotConnector
+) -> None:
    """Test basic functionality of the Highspot connector."""
    all_docs: list[Document] = []
    test_data = load_test_data()
@@ -76,10 +81,13 @@ def test_highspot_connector_basic(highspot_connector: HighspotConnector) -> None
        assert len(section.text) > 0


-@pytest.mark.xfail(
-    reason="Possibly accessing postgres that isn't available in connector only tests",
+@patch(
+    "onyx.file_processing.extract_file_text.get_unstructured_api_key",
+    return_value=None,
 )
-def test_highspot_connector_slim(highspot_connector: HighspotConnector) -> None:
+def test_highspot_connector_slim(
+    mock_get_api_key: MagicMock, highspot_connector: HighspotConnector
+) -> None:
    """Test slim document retrieval."""
    # Get all doc IDs from the full connector
    all_full_doc_ids = set()
--- a/web/package-lock.json
+++ b/web/package-lock.json
--- a/web/src/app/admin/assistants/AssistantEditor.tsx
+++ b/web/src/app/admin/assistants/AssistantEditor.tsx
@@ -469,8 +469,14 @@ export function AssistantEditor({
            description: Yup.string().required(
              "Must provide a description for the Assistant"
            ),
-            system_prompt: Yup.string(),
-            task_prompt: Yup.string(),
+            system_prompt: Yup.string().max(
+              8000,
+              "Instructions must be less than 8000 characters"
+            ),
+            task_prompt: Yup.string().max(
+              8000,
+              "Reminders must be less than 8000 characters"
+            ),
            is_public: Yup.boolean().required(),
            document_set_ids: Yup.array().of(Yup.number()),
            num_chunks: Yup.number().nullable(),
--- a/web/src/app/admin/embeddings/RerankingFormPage.tsx
+++ b/web/src/app/admin/embeddings/RerankingFormPage.tsx
@@ -76,7 +76,7 @@ const RerankingDetailsForm = forwardRef<
          function (value) {
            const { rerank_provider_type } = this.parent;
            return (
-              rerank_provider_type !== RerankerProvider.COHERE ||
+              rerank_provider_type === RerankerProvider.LITELLM ||
              (value !== null && value !== "")
            );
          }
@@ -457,7 +457,7 @@ const RerankingDetailsForm = forwardRef<
                            ...values,
                            rerank_api_key: value,
                          });
-                          setFieldValue("api_key", value);
+                          setFieldValue("rerank_api_key", value);
                        }}
                        type="password"
                        label={
Author	SHA1	Message	Date
pablonyx	e823919892	fix	2025-04-01 11:27:58 -07:00
pablonyx	2f3020a4d3	Update migration (#4410 )	2025-04-01 09:10:24 -07:00
SubashMohan	4bae1318bb	refactor tests for Highspot connector to use mocking for API key retrieval (#4346 )	2025-04-01 02:39:05 +00:00
Weves	11c3f44c76	Init engine in slackbot	2025-03-31 17:04:20 -07:00
rkuo-danswer	cb38ac8a97	also set permission upsert to medium priority (#4405 ) Co-authored-by: Richard Kuo (Onyx) <rkuo@onyx.app>	2025-03-31 14:59:31 -07:00
pablonyx	b2120b9f39	add user files (#4152 )	2025-03-31 21:06:59 +00:00
rkuo-danswer	ccd372cc4a	Bugfix/slack rate limiting (#4386 ) * use slack's built in rate limit handler for the bot * WIP * fix the slack rate limit handler * change default to 8 * cleanup * try catch int conversion just in case * linearize this logic better * code review comments --------- Co-authored-by: Richard Kuo (Onyx) <rkuo@onyx.app>	2025-03-31 21:00:26 +00:00
evan-danswer	ea30f1de1e	minor improvement to fireflies connector (#4383 ) * minor improvement to fireflies connector * reduce time diff	2025-03-31 20:00:52 +00:00
evan-danswer	a7130681d9	ensure bedrock model contains API key (#4396 ) * ensure bedrock model contains API key * fix storing bug	2025-03-31 19:58:53 +00:00