k

2026-02-17 07:45:47 +00:00 · 2025-03-05 15:27:14 -08:00
6 changed files with 381 additions and 84 deletions
--- a/.github/workflows/docker-build-push-model-server-container-on-tag.yml
+++ b/.github/workflows/docker-build-push-model-server-container-on-tag.yml
@@ -12,40 +12,29 @@ env:
  BUILDKIT_PROGRESS: plain

 jobs:
-
-#   Bypassing this for now as the idea of not building is glitching
-#   releases and builds that depends on everything being tagged in docker
-#   1) Preliminary job to check if the changed files are relevant
-#   check_model_server_changes:
-#     runs-on: ubuntu-latest
-#     outputs:
-#       changed: ${{ steps.check.outputs.changed }}
-#     steps:
-#       - name: Checkout code
-#         uses: actions/checkout@v4
-# 
-#       - name: Check if relevant files changed
-#         id: check
-#         run: |
-#           # Default to "false"
-#           echo "changed=false" >> $GITHUB_OUTPUT
-# 
-#           # Compare the previous commit (github.event.before) to the current one (github.sha)
-#           # If any file in backend/model_server/** or backend/Dockerfile.model_server is changed,
-#           # set changed=true
-#           if git diff --name-only ${{ github.event.before }} ${{ github.sha }} \
-#              | grep -E '^backend/model_server/|^backend/Dockerfile.model_server'; then
-#             echo "changed=true" >> $GITHUB_OUTPUT
-#           fi
-
+  # 1) Preliminary job to check if the changed files are relevant
  check_model_server_changes:
    runs-on: ubuntu-latest
    outputs:
-      changed: "true"
+      changed: ${{ steps.check.outputs.changed }}
    steps:
-      - name: Bypass check and set output
-        run: echo "changed=true" >> $GITHUB_ENV
-        
+      - name: Checkout code
+        uses: actions/checkout@v4
+
+      - name: Check if relevant files changed
+        id: check
+        run: |
+          # Default to "false"
+          echo "changed=false" >> $GITHUB_OUTPUT
+
+          # Compare the previous commit (github.event.before) to the current one (github.sha)
+          # If any file in backend/model_server/** or backend/Dockerfile.model_server is changed,
+          # set changed=true
+          if git diff --name-only ${{ github.event.before }} ${{ github.sha }} \
+             | grep -E '^backend/model_server/|^backend/Dockerfile.model_server'; then
+            echo "changed=true" >> $GITHUB_OUTPUT
+          fi
+
  build-amd64:
    needs: [check_model_server_changes]
    if: needs.check_model_server_changes.outputs.changed == 'true'
--- a/backend/alembic/versions/3bd4c84fe72f_improved_index.py
+++ b/backend/alembic/versions/3bd4c84fe72f_improved_index.py
@@ -6,7 +6,8 @@ Create Date: 2025-02-26 13:07:56.217791

 """
 from alembic import op
-
+import time
+from sqlalchemy import text

 # revision identifiers, used by Alembic.
 revision = "3bd4c84fe72f"
@@ -27,45 +28,357 @@ depends_on = None
 # 4. Adds indexes to both chat_message and chat_session tables for comprehensive search


-def upgrade() -> None:
-    # Create a GIN index for full-text search on chat_message.message
+def upgrade():
+    # --- PART 1: chat_message table ---
+    # Step 1: Add nullable column (quick, minimal locking)
+    # op.execute("ALTER TABLE chat_message DROP COLUMN IF EXISTS message_tsv")
+    # op.execute("DROP TRIGGER IF EXISTS chat_message_tsv_trigger ON chat_message")
+    # op.execute("DROP FUNCTION IF EXISTS update_chat_message_tsv()")
+    # op.execute("ALTER TABLE chat_message DROP COLUMN IF EXISTS message_tsv")
+    # # Drop chat_session tsv trigger if it exists
+    # op.execute("DROP TRIGGER IF EXISTS chat_session_tsv_trigger ON chat_session")
+    # op.execute("DROP FUNCTION IF EXISTS update_chat_session_tsv()")
+    # op.execute("ALTER TABLE chat_session DROP COLUMN IF EXISTS title_tsv")
+    # raise Exception("Stop here")
+    time.time()
+    op.execute("ALTER TABLE chat_message ADD COLUMN IF NOT EXISTS message_tsv tsvector")
+
+    # Step 2: Create function and trigger for new/updated rows
    op.execute(
        """
-        ALTER TABLE chat_message
-        ADD COLUMN message_tsv tsvector
-        GENERATED ALWAYS AS (to_tsvector('english', message)) STORED;
-        """
+    CREATE OR REPLACE FUNCTION update_chat_message_tsv()
+    RETURNS TRIGGER AS $$
+    BEGIN
+      NEW.message_tsv = to_tsvector('english', NEW.message);
+      RETURN NEW;
+    END;
+    $$ LANGUAGE plpgsql
+    """
    )

-    # Commit the current transaction before creating concurrent indexes
-    op.execute("COMMIT")
-
+    # Create trigger in a separate execute call
    op.execute(
        """
-        CREATE INDEX CONCURRENTLY IF NOT EXISTS idx_chat_message_tsv
-        ON chat_message
-        USING GIN (message_tsv)
-        """
+    CREATE TRIGGER chat_message_tsv_trigger
+    BEFORE INSERT OR UPDATE ON chat_message
+    FOR EACH ROW EXECUTE FUNCTION update_chat_message_tsv()
+    """
    )

-    # Also add a stored tsvector column for chat_session.description
-    op.execute(
-        """
-        ALTER TABLE chat_session
-        ADD COLUMN description_tsv tsvector
-        GENERATED ALWAYS AS (to_tsvector('english', coalesce(description, ''))) STORED;
-        """
+    # Step 3: Update existing rows in batches using Python
+    time.time()
+
+    # Get connection and count total rows
+    connection = op.get_bind()
+    total_count_result = connection.execute(
+        text("SELECT COUNT(*) FROM chat_message")
+    ).scalar()
+    total_count = total_count_result if total_count_result is not None else 0
+    batch_size = 5000
+    batches = 0
+
+    # Calculate total batches needed
+    total_batches = (
+        (total_count + batch_size - 1) // batch_size if total_count > 0 else 0
    )

-    # Commit again before creating the second concurrent index
-    op.execute("COMMIT")
+    # Process in batches - properly handling UUIDs by using OFFSET/LIMIT approach
+    for batch_num in range(total_batches):
+        offset = batch_num * batch_size

-    op.execute(
-        """
-        CREATE INDEX CONCURRENTLY IF NOT EXISTS idx_chat_session_desc_tsv
-        ON chat_session
-        USING GIN (description_tsv)
-        """
+        # Execute update for this batch using OFFSET/LIMIT which works with UUIDs
+        connection.execute(
+            text(
+                """
+            UPDATE chat_message
+            SET message_tsv = to_tsvector('english', message)
+            WHERE id IN (
+                SELECT id FROM chat_message
+                WHERE message_tsv IS NULL
+                ORDER BY id
+                LIMIT :batch_size OFFSET :offset
+            )
+            """
+            ).bindparams(batch_size=batch_size, offset=offset)
+        )
+
+        # Commit each batch
+        connection.execute(text("COMMIT"))
+        # Start a new transaction
+        connection.execute(text("BEGIN"))
+
+        batches += 1
+
+    # Final check for any remaining NULL values
+    connection.execute(
+        text(
+            """
+    UPDATE chat_message SET message_tsv = to_tsvector('english', message)
+    WHERE message_tsv IS NULL
+    """
+        )
+    )
+
+    # Create GIN index concurrently
+    connection.execute(text("COMMIT"))
+
+    time.time()
+
+    connection.execute(
+        text(
+            """
+    CREATE INDEX CONCURRENTLY IF NOT EXISTS idx_chat_message_tsv
+    ON chat_message USING GIN (message_tsv)
+    """
+        )
+    )
+
+    # First drop the trigger as it won't be needed anymore
+    connection.execute(
+        text(
+            """
+    DROP TRIGGER IF EXISTS chat_message_tsv_trigger ON chat_message;
+    """
+        )
+    )
+
+    connection.execute(
+        text(
+            """
+    DROP FUNCTION IF EXISTS update_chat_message_tsv();
+    """
+        )
+    )
+
+    # Add new generated column
+    time.time()
+    connection.execute(
+        text(
+            """
+    ALTER TABLE chat_message
+    ADD COLUMN message_tsv_gen tsvector
+    GENERATED ALWAYS AS (to_tsvector('english', message)) STORED;
+    """
+        )
+    )
+
+    connection.execute(text("COMMIT"))
+
+    time.time()
+
+    connection.execute(
+        text(
+            """
+    CREATE INDEX CONCURRENTLY IF NOT EXISTS idx_chat_message_tsv_gen
+    ON chat_message USING GIN (message_tsv_gen)
+    """
+        )
+    )
+
+    # Drop old index and column
+    connection.execute(text("COMMIT"))
+
+    connection.execute(
+        text(
+            """
+    DROP INDEX CONCURRENTLY IF EXISTS idx_chat_message_tsv;
+    """
+        )
+    )
+    connection.execute(text("COMMIT"))
+    connection.execute(
+        text(
+            """
+    ALTER TABLE chat_message DROP COLUMN message_tsv;
+    """
+        )
+    )
+
+    # Rename new column to old name
+    connection.execute(
+        text(
+            """
+    ALTER TABLE chat_message RENAME COLUMN message_tsv_gen TO message_tsv;
+    """
+        )
+    )
+
+    # --- PART 2: chat_session table ---
+
+    # Step 1: Add nullable column (quick, minimal locking)
+    time.time()
+    connection.execute(
+        text(
+            "ALTER TABLE chat_session ADD COLUMN IF NOT EXISTS description_tsv tsvector"
+        )
+    )
+
+    # Step 2: Create function and trigger for new/updated rows - SPLIT INTO SEPARATE CALLS
+    connection.execute(
+        text(
+            """
+    CREATE OR REPLACE FUNCTION update_chat_session_tsv()
+    RETURNS TRIGGER AS $$
+    BEGIN
+      NEW.description_tsv = to_tsvector('english', COALESCE(NEW.description, ''));
+      RETURN NEW;
+    END;
+    $$ LANGUAGE plpgsql
+    """
+        )
+    )
+
+    # Create trigger in a separate execute call
+    connection.execute(
+        text(
+            """
+    CREATE TRIGGER chat_session_tsv_trigger
+    BEFORE INSERT OR UPDATE ON chat_session
+    FOR EACH ROW EXECUTE FUNCTION update_chat_session_tsv()
+    """
+        )
+    )
+
+    # Step 3: Update existing rows in batches using Python
+    time.time()
+
+    # Get the maximum ID to determine batch count
+    # Cast id to text for MAX function since it's a UUID
+    max_id_result = connection.execute(
+        text("SELECT COALESCE(MAX(id::text), '0') FROM chat_session")
+    ).scalar()
+    max_id_result if max_id_result is not None else "0"
+    batch_size = 5000
+    batches = 0
+
+    # Get all IDs ordered to process in batches
+    rows = connection.execute(
+        text("SELECT id FROM chat_session ORDER BY id")
+    ).fetchall()
+    total_rows = len(rows)
+
+    # Process in batches
+    for batch_num, batch_start in enumerate(range(0, total_rows, batch_size)):
+        batch_end = min(batch_start + batch_size, total_rows)
+        batch_ids = [row[0] for row in rows[batch_start:batch_end]]
+
+        if not batch_ids:
+            continue
+
+        # Use IN clause instead of BETWEEN for UUIDs
+        placeholders = ", ".join([f":id{i}" for i in range(len(batch_ids))])
+        params = {f"id{i}": id_val for i, id_val in enumerate(batch_ids)}
+
+        # Execute update for this batch
+        connection.execute(
+            text(
+                f"""
+            UPDATE chat_session
+            SET description_tsv = to_tsvector('english', COALESCE(description, ''))
+            WHERE id IN ({placeholders})
+            AND description_tsv IS NULL
+            """
+            ).bindparams(**params)
+        )
+
+        # Commit each batch
+        connection.execute(text("COMMIT"))
+        # Start a new transaction
+        connection.execute(text("BEGIN"))
+
+        batches += 1
+
+    # Final check for any remaining NULL values
+    connection.execute(
+        text(
+            """
+    UPDATE chat_session SET description_tsv = to_tsvector('english', COALESCE(description, ''))
+    WHERE description_tsv IS NULL
+    """
+        )
+    )
+
+    # Create GIN index concurrently
+    connection.execute(text("COMMIT"))
+
+    time.time()
+    connection.execute(
+        text(
+            """
+    CREATE INDEX CONCURRENTLY IF NOT EXISTS idx_chat_session_desc_tsv
+    ON chat_session USING GIN (description_tsv)
+    """
+        )
+    )
+
+    # After Final check for chat_session
+    # First drop the trigger as it won't be needed anymore
+    connection.execute(
+        text(
+            """
+    DROP TRIGGER IF EXISTS chat_session_tsv_trigger ON chat_session;
+    """
+        )
+    )
+
+    connection.execute(
+        text(
+            """
+    DROP FUNCTION IF EXISTS update_chat_session_tsv();
+    """
+        )
+    )
+    # Add new generated column
+    time.time()
+    connection.execute(
+        text(
+            """
+    ALTER TABLE chat_session
+    ADD COLUMN description_tsv_gen tsvector
+    GENERATED ALWAYS AS (to_tsvector('english', COALESCE(description, ''))) STORED;
+    """
+        )
+    )
+
+    # Create new index on generated column
+    connection.execute(text("COMMIT"))
+
+    time.time()
+    connection.execute(
+        text(
+            """
+    CREATE INDEX CONCURRENTLY IF NOT EXISTS idx_chat_session_desc_tsv_gen
+    ON chat_session USING GIN (description_tsv_gen)
+    """
+        )
+    )
+
+    # Drop old index and column
+    connection.execute(text("COMMIT"))
+
+    connection.execute(
+        text(
+            """
+    DROP INDEX CONCURRENTLY IF EXISTS idx_chat_session_desc_tsv;
+    """
+        )
+    )
+    connection.execute(text("COMMIT"))
+    connection.execute(
+        text(
+            """
+    ALTER TABLE chat_session DROP COLUMN description_tsv;
+    """
+        )
+    )
+
+    # Rename new column to old name
+    connection.execute(
+        text(
+            """
+    ALTER TABLE chat_session RENAME COLUMN description_tsv_gen TO description_tsv;
+    """
+        )
    )


--- a/backend/ee/onyx/main.py
+++ b/backend/ee/onyx/main.py
@@ -15,7 +15,7 @@ from ee.onyx.server.enterprise_settings.api import (
 )
 from ee.onyx.server.manage.standard_answer import router as standard_answer_router
 from ee.onyx.server.middleware.tenant_tracking import add_tenant_id_middleware
-from ee.onyx.server.oauth.api import router as ee_oauth_router
+from ee.onyx.server.oauth.api import router as oauth_router
 from ee.onyx.server.query_and_chat.chat_backend import (
    router as chat_router,
 )
@@ -128,7 +128,7 @@ def get_application() -> FastAPI:
    include_router_with_global_prefix_prepended(application, query_router)
    include_router_with_global_prefix_prepended(application, chat_router)
    include_router_with_global_prefix_prepended(application, standard_answer_router)
-    include_router_with_global_prefix_prepended(application, ee_oauth_router)
+    include_router_with_global_prefix_prepended(application, oauth_router)

    # Enterprise-only global settings
    include_router_with_global_prefix_prepended(
--- a/backend/onyx/connectors/slack/connector.py
+++ b/backend/onyx/connectors/slack/connector.py
@@ -674,7 +674,7 @@ class SlackConnector(SlimConnector, CheckpointConnector):
        """
        1. Verify the bot token is valid for the workspace (via auth_test).
        2. Ensure the bot has enough scope to list channels.
-        3. Check that every channel specified in self.channels exists (only when regex is not enabled).
+        3. Check that every channel specified in self.channels exists.
        """
        if self.client is None:
            raise ConnectorMissingCredentialError("Slack credentials not loaded.")
@@ -706,8 +706,8 @@ class SlackConnector(SlimConnector, CheckpointConnector):
                    f"Slack API returned a failure: {error_msg}"
                )

-            # 3) If channels are specified and regex is not enabled, verify each is accessible
-            if self.channels and not self.channel_regex_enabled:
+            # 3) If channels are specified, verify each is accessible
+            if self.channels:
                accessible_channels = get_channels(
                    client=self.client,
                    exclude_archived=True,
--- a/backend/onyx/main.py
+++ b/backend/onyx/main.py
@@ -51,7 +51,6 @@ from onyx.server.documents.cc_pair import router as cc_pair_router
 from onyx.server.documents.connector import router as connector_router
 from onyx.server.documents.credential import router as credential_router
 from onyx.server.documents.document import router as document_router
-from onyx.server.documents.standard_oauth import router as standard_oauth_router
 from onyx.server.features.document_set.api import router as document_set_router
 from onyx.server.features.folder.api import router as folder_router
 from onyx.server.features.input_prompt.api import (
@@ -323,7 +322,6 @@ def get_application() -> FastAPI:
    )
    include_router_with_global_prefix_prepended(application, long_term_logs_router)
    include_router_with_global_prefix_prepended(application, api_key_router)
-    include_router_with_global_prefix_prepended(application, standard_oauth_router)

    if AUTH_TYPE == AuthType.DISABLED:
        # Server logs this during auth setup verification step
--- a/web/src/app/admin/settings/SettingsForm.tsx
+++ b/web/src/app/admin/settings/SettingsForm.tsx
@@ -290,24 +290,21 @@ export function SettingsForm() {
            id="chatRetentionInput"
            placeholder="Infinite Retention"
          />
-          <div className="mr-auto flex gap-2">
-            <Button
-              onClick={handleSetChatRetention}
-              variant="submit"
-              size="sm"
-              className="mr-auto"
-            >
-              Set Retention Limit
-            </Button>
-            <Button
-              onClick={handleClearChatRetention}
-              variant="default"
-              size="sm"
-              className="mr-auto"
-            >
-              Retain All
-            </Button>
-          </div>
+          <Button
+            onClick={handleSetChatRetention}
+            variant="submit"
+            size="sm"
+            className="mr-3"
+          >
+            Set Retention Limit
+          </Button>
+          <Button
+            onClick={handleClearChatRetention}
+            variant="default"
+            size="sm"
+          >
+            Retain All
+          </Button>
        </>
      )}