k

redis -> pg advisory lock (https://www.postgresql.org/docs/current/explicit-locking.html#ADVISORY-LOCKS )
2026-02-26 20:25:46 +00:00 · 2025-03-11 16:03:02 -07:00 · 2025-03-11 16:02:10 -07:00 · 2025-03-11 15:59:17 -07:00 · 2025-03-11 15:49:31 -07:00 · 2025-03-06 09:39:18 -08:00
25 changed files with 524 additions and 415 deletions
--- a/backend/alembic/versions/3934b1bc7b62_update_github_connector_repo_name_to_.py
+++ b/backend/alembic/versions/3934b1bc7b62_update_github_connector_repo_name_to_.py
@@ -1,125 +0,0 @@
-"""Update GitHub connector repo_name to repositories
-
-Revision ID: 3934b1bc7b62
-Revises: b7c2b63c4a03
-Create Date: 2025-03-05 10:50:30.516962
-
-"""
-from alembic import op
-import sqlalchemy as sa
-import json
-import logging
-
-# revision identifiers, used by Alembic.
-revision = "3934b1bc7b62"
-down_revision = "b7c2b63c4a03"
-branch_labels = None
-depends_on = None
-
-logger = logging.getLogger("alembic.runtime.migration")
-
-
-def upgrade() -> None:
-    # Get all GitHub connectors
-    conn = op.get_bind()
-
-    # First get all GitHub connectors
-    github_connectors = conn.execute(
-        sa.text(
-            """
-            SELECT id, connector_specific_config
-            FROM connector
-            WHERE source = 'GITHUB'
-            """
-        )
-    ).fetchall()
-
-    # Update each connector's config
-    updated_count = 0
-    for connector_id, config in github_connectors:
-        try:
-            if not config:
-                logger.warning(f"Connector {connector_id} has no config, skipping")
-                continue
-
-            # Parse the config if it's a string
-            if isinstance(config, str):
-                config = json.loads(config)
-
-            if "repo_name" not in config:
-                continue
-
-            # Create new config with repositories instead of repo_name
-            new_config = dict(config)
-            repo_name_value = new_config.pop("repo_name")
-            new_config["repositories"] = repo_name_value
-
-            # Update the connector with the new config
-            conn.execute(
-                sa.text(
-                    """
-                    UPDATE connector
-                    SET connector_specific_config = :new_config
-                    WHERE id = :connector_id
-                    """
-                ),
-                {"connector_id": connector_id, "new_config": json.dumps(new_config)},
-            )
-            updated_count += 1
-        except Exception as e:
-            logger.error(f"Error updating connector {connector_id}: {str(e)}")
-
-
-def downgrade() -> None:
-    # Get all GitHub connectors
-    conn = op.get_bind()
-
-    logger.debug(
-        "Starting rollback of GitHub connectors from repositories to repo_name"
-    )
-
-    github_connectors = conn.execute(
-        sa.text(
-            """
-            SELECT id, connector_specific_config
-            FROM connector
-            WHERE source = 'GITHUB'
-            """
-        )
-    ).fetchall()
-
-    logger.debug(f"Found {len(github_connectors)} GitHub connectors to rollback")
-
-    # Revert each GitHub connector to use repo_name instead of repositories
-    reverted_count = 0
-    for connector_id, config in github_connectors:
-        try:
-            if not config:
-                continue
-
-            # Parse the config if it's a string
-            if isinstance(config, str):
-                config = json.loads(config)
-
-            if "repositories" not in config:
-                continue
-
-            # Create new config with repo_name instead of repositories
-            new_config = dict(config)
-            repositories_value = new_config.pop("repositories")
-            new_config["repo_name"] = repositories_value
-
-            # Update the connector with the new config
-            conn.execute(
-                sa.text(
-                    """
-                    UPDATE connector
-                    SET connector_specific_config = :new_config
-                    WHERE id = :connector_id
-                    """
-                ),
-                {"new_config": json.dumps(new_config), "connector_id": connector_id},
-            )
-            reverted_count += 1
-        except Exception as e:
-            logger.error(f"Error reverting connector {connector_id}: {str(e)}")
--- a/backend/alembic/versions/3bd4c84fe72f_improved_index.py
+++ b/backend/alembic/versions/3bd4c84fe72f_improved_index.py
@@ -6,7 +6,13 @@ Create Date: 2025-02-26 13:07:56.217791

 """
 from alembic import op
+import time
+import hashlib
+from sqlalchemy import text

+# Remove Redis import as we're not using it anymore
+# from onyx.redis.redis_pool import get_redis_client
+# from onyx.configs.app_configs import ALEMBIC_MIGRATION_LOCK_KEY

 # revision identifiers, used by Alembic.
 revision = "3bd4c84fe72f"
@@ -14,6 +20,12 @@ down_revision = "8f43500ee275"
 branch_labels = None
 depends_on = None

+# Define a constant for our advisory lock
+# Converting a string to a bigint for advisory lock
+ALEMBIC_MIGRATION_LOCK_KEY = int(
+    hashlib.md5("alembic_migration_lock".encode()).hexdigest()[:15], 16
+)
+

 # NOTE:
 # This migration addresses issues with the previous migration (8f43500ee275) which caused
@@ -28,45 +40,412 @@ depends_on = None


 def upgrade() -> None:
-    # Create a GIN index for full-text search on chat_message.message
-    op.execute(
-        """
-        ALTER TABLE chat_message
-        ADD COLUMN message_tsv tsvector
-        GENERATED ALWAYS AS (to_tsvector('english', message)) STORED;
-        """
-    )
+    # Use PostgreSQL advisory locks to ensure only one migration runs at a time
+    connection = op.get_bind()

-    # Commit the current transaction before creating concurrent indexes
-    op.execute("COMMIT")
+    # Try to acquire an advisory lock (exclusive, session level)
+    lock_acquired = connection.execute(
+        text("SELECT pg_try_advisory_lock(:lock_key)").bindparams(
+            lock_key=ALEMBIC_MIGRATION_LOCK_KEY
+        )
+    ).scalar()

-    op.execute(
-        """
-        CREATE INDEX CONCURRENTLY IF NOT EXISTS idx_chat_message_tsv
-        ON chat_message
-        USING GIN (message_tsv)
-        """
-    )
+    if not lock_acquired:
+        raise Exception(
+            "Migration already in progress by another process. Try again later."
+        )

-    # Also add a stored tsvector column for chat_session.description
-    op.execute(
-        """
-        ALTER TABLE chat_session
-        ADD COLUMN description_tsv tsvector
-        GENERATED ALWAYS AS (to_tsvector('english', coalesce(description, ''))) STORED;
-        """
-    )
+    try:
+        # --- PART 1: chat_message table ---
+        # Step 1: Add nullable column (quick, minimal locking)
+        op.execute("ALTER TABLE chat_message DROP COLUMN IF EXISTS message_tsv_gen")
+        op.execute("ALTER TABLE chat_message DROP COLUMN IF EXISTS message_tsv")
+        op.execute("DROP TRIGGER IF EXISTS chat_message_tsv_trigger ON chat_message")
+        op.execute("DROP FUNCTION IF EXISTS update_chat_message_tsv()")
+        op.execute("ALTER TABLE chat_message DROP COLUMN IF EXISTS message_tsv")
+        # Drop chat_session tsv trigger if it exists
+        op.execute("DROP TRIGGER IF EXISTS chat_session_tsv_trigger ON chat_session")
+        op.execute("DROP FUNCTION IF EXISTS update_chat_session_tsv()")
+        op.execute("ALTER TABLE chat_session DROP COLUMN IF EXISTS title_tsv")

-    # Commit again before creating the second concurrent index
-    op.execute("COMMIT")
+        # Drop all indexes that will be created later (using CONCURRENTLY to avoid locking)
+        op.execute("COMMIT")  # Required for CONCURRENTLY
+        op.execute("DROP INDEX CONCURRENTLY IF EXISTS idx_chat_message_tsv")
+        op.execute("COMMIT")
+        op.execute("DROP INDEX CONCURRENTLY IF EXISTS idx_chat_message_tsv_gen")
+        op.execute("COMMIT")
+        op.execute("DROP INDEX CONCURRENTLY IF EXISTS idx_chat_session_desc_tsv")
+        op.execute("COMMIT")
+        op.execute("DROP INDEX CONCURRENTLY IF EXISTS idx_chat_session_desc_tsv_gen")
+        op.execute("COMMIT")
+        op.execute("DROP INDEX CONCURRENTLY IF EXISTS idx_chat_message_message_lower")
+        op.execute("COMMIT")

-    op.execute(
-        """
-        CREATE INDEX CONCURRENTLY IF NOT EXISTS idx_chat_session_desc_tsv
-        ON chat_session
-        USING GIN (description_tsv)
-        """
-    )
+        # Drop any column on chat_session that will be created
+        op.execute("ALTER TABLE chat_session DROP COLUMN IF EXISTS description_tsv")
+        op.execute("ALTER TABLE chat_session DROP COLUMN IF EXISTS description_tsv_gen")
+
+        # Begin a new transaction before continuing
+        op.execute("BEGIN")
+
+        time.time()
+        op.execute(
+            "ALTER TABLE chat_message ADD COLUMN IF NOT EXISTS message_tsv tsvector"
+        )
+
+        # Step 2: Create function and trigger for new/updated rows
+        op.execute(
+            """
+    CREATE OR REPLACE FUNCTION update_chat_message_tsv()
+    RETURNS TRIGGER AS $$
+    BEGIN
+      NEW.message_tsv = to_tsvector('english', NEW.message);
+      RETURN NEW;
+    END;
+    $$ LANGUAGE plpgsql
+    """
+        )
+
+        # Create trigger in a separate execute call
+        op.execute(
+            """
+    CREATE TRIGGER chat_message_tsv_trigger
+    BEFORE INSERT OR UPDATE ON chat_message
+    FOR EACH ROW EXECUTE FUNCTION update_chat_message_tsv()
+    """
+        )
+
+        # Step 3: Update existing rows in batches using Python
+        time.time()
+
+        # Get connection and count total rows
+        connection = op.get_bind()
+        total_count_result = connection.execute(
+            text("SELECT COUNT(*) FROM chat_message")
+        ).scalar()
+        total_count = total_count_result if total_count_result is not None else 0
+        batch_size = 5000
+        batches = 0
+
+        # Calculate total batches needed
+        total_batches = (
+            (total_count + batch_size - 1) // batch_size if total_count > 0 else 0
+        )
+
+        # Process in batches - properly handling UUIDs by using OFFSET/LIMIT approach
+        for batch_num in range(total_batches):
+            offset = batch_num * batch_size
+
+            # Execute update for this batch using OFFSET/LIMIT which works with UUIDs
+            connection.execute(
+                text(
+                    """
+            UPDATE chat_message
+            SET message_tsv = to_tsvector('english', message)
+            WHERE id IN (
+                SELECT id FROM chat_message
+                WHERE message_tsv IS NULL
+                ORDER BY id
+                LIMIT :batch_size OFFSET :offset
+            )
+            """
+                ).bindparams(batch_size=batch_size, offset=offset)
+            )
+
+            # Commit each batch
+            connection.execute(text("COMMIT"))
+            # Start a new transaction
+            connection.execute(text("BEGIN"))
+
+            batches += 1
+
+        # Final check for any remaining NULL values
+        connection.execute(
+            text(
+                """
+    UPDATE chat_message SET message_tsv = to_tsvector('english', message)
+    WHERE message_tsv IS NULL
+    """
+            )
+        )
+
+        # Create GIN index concurrently
+        connection.execute(text("COMMIT"))
+
+        time.time()
+
+        connection.execute(
+            text(
+                """
+    CREATE INDEX CONCURRENTLY IF NOT EXISTS idx_chat_message_tsv
+    ON chat_message USING GIN (message_tsv)
+    """
+            )
+        )
+
+        # First drop the trigger as it won't be needed anymore
+        connection.execute(
+            text(
+                """
+    DROP TRIGGER IF EXISTS chat_message_tsv_trigger ON chat_message;
+    """
+            )
+        )
+
+        connection.execute(
+            text(
+                """
+    DROP FUNCTION IF EXISTS update_chat_message_tsv();
+    """
+            )
+        )
+
+        # Add new generated column
+        time.time()
+        connection.execute(
+            text(
+                """
+    ALTER TABLE chat_message
+    ADD COLUMN message_tsv_gen tsvector
+    GENERATED ALWAYS AS (to_tsvector('english', message)) STORED;
+    """
+            )
+        )
+
+        connection.execute(text("COMMIT"))
+
+        time.time()
+
+        connection.execute(
+            text(
+                """
+    CREATE INDEX CONCURRENTLY IF NOT EXISTS idx_chat_message_tsv_gen
+    ON chat_message USING GIN (message_tsv_gen)
+    """
+            )
+        )
+
+        # Drop old index and column
+        connection.execute(text("COMMIT"))
+
+        connection.execute(
+            text(
+                """
+    DROP INDEX CONCURRENTLY IF EXISTS idx_chat_message_tsv;
+    """
+            )
+        )
+        connection.execute(text("COMMIT"))
+        connection.execute(
+            text(
+                """
+    ALTER TABLE chat_message DROP COLUMN message_tsv;
+    """
+            )
+        )
+
+        # Rename new column to old name
+        connection.execute(
+            text(
+                """
+    ALTER TABLE chat_message RENAME COLUMN message_tsv_gen TO message_tsv;
+    """
+            )
+        )
+
+        # --- PART 2: chat_session table ---
+
+        # Step 1: Add nullable column (quick, minimal locking)
+        time.time()
+        connection.execute(
+            text(
+                "ALTER TABLE chat_session ADD COLUMN IF NOT EXISTS description_tsv tsvector"
+            )
+        )
+
+        # Step 2: Create function and trigger for new/updated rows - SPLIT INTO SEPARATE CALLS
+        connection.execute(
+            text(
+                """
+    CREATE OR REPLACE FUNCTION update_chat_session_tsv()
+    RETURNS TRIGGER AS $$
+    BEGIN
+      NEW.description_tsv = to_tsvector('english', COALESCE(NEW.description, ''));
+      RETURN NEW;
+    END;
+    $$ LANGUAGE plpgsql
+    """
+            )
+        )
+
+        # Create trigger in a separate execute call
+        connection.execute(
+            text(
+                """
+    CREATE TRIGGER chat_session_tsv_trigger
+    BEFORE INSERT OR UPDATE ON chat_session
+    FOR EACH ROW EXECUTE FUNCTION update_chat_session_tsv()
+    """
+            )
+        )
+
+        # Step 3: Update existing rows in batches using Python
+        time.time()
+
+        # Get the maximum ID to determine batch count
+        # Cast id to text for MAX function since it's a UUID
+        max_id_result = connection.execute(
+            text("SELECT COALESCE(MAX(id::text), '0') FROM chat_session")
+        ).scalar()
+        max_id_result if max_id_result is not None else "0"
+        batch_size = 5000
+        batches = 0
+
+        # Get all IDs ordered to process in batches
+        rows = connection.execute(
+            text("SELECT id FROM chat_session ORDER BY id")
+        ).fetchall()
+        total_rows = len(rows)
+
+        # Process in batches
+        for batch_num, batch_start in enumerate(range(0, total_rows, batch_size)):
+            batch_end = min(batch_start + batch_size, total_rows)
+            batch_ids = [row[0] for row in rows[batch_start:batch_end]]
+
+            if not batch_ids:
+                continue
+
+            # Use IN clause instead of BETWEEN for UUIDs
+            placeholders = ", ".join([f":id{i}" for i in range(len(batch_ids))])
+            params = {f"id{i}": id_val for i, id_val in enumerate(batch_ids)}
+
+            # Execute update for this batch
+            connection.execute(
+                text(
+                    f"""
+            UPDATE chat_session
+            SET description_tsv = to_tsvector('english', COALESCE(description, ''))
+            WHERE id IN ({placeholders})
+            AND description_tsv IS NULL
+            """
+                ).bindparams(**params)
+            )
+
+            # Commit each batch
+            connection.execute(text("COMMIT"))
+            # Start a new transaction
+            connection.execute(text("BEGIN"))
+
+            batches += 1
+
+        # Final check for any remaining NULL values
+        connection.execute(
+            text(
+                """
+    UPDATE chat_session SET description_tsv = to_tsvector('english', COALESCE(description, ''))
+    WHERE description_tsv IS NULL
+    """
+            )
+        )
+
+        # Create GIN index concurrently
+        connection.execute(text("COMMIT"))
+
+        time.time()
+        connection.execute(
+            text(
+                """
+    CREATE INDEX CONCURRENTLY IF NOT EXISTS idx_chat_session_desc_tsv
+    ON chat_session USING GIN (description_tsv)
+    """
+            )
+        )
+
+        # After Final check for chat_session
+        # First drop the trigger as it won't be needed anymore
+        connection.execute(
+            text(
+                """
+    DROP TRIGGER IF EXISTS chat_session_tsv_trigger ON chat_session;
+    """
+            )
+        )
+
+        connection.execute(
+            text(
+                """
+    DROP FUNCTION IF EXISTS update_chat_session_tsv();
+    """
+            )
+        )
+        # Add new generated column
+        time.time()
+        connection.execute(
+            text(
+                """
+    ALTER TABLE chat_session
+    ADD COLUMN description_tsv_gen tsvector
+    GENERATED ALWAYS AS (to_tsvector('english', COALESCE(description, ''))) STORED;
+    """
+            )
+        )
+
+        # Create new index on generated column
+        connection.execute(text("COMMIT"))
+
+        time.time()
+        connection.execute(
+            text(
+                """
+    CREATE INDEX CONCURRENTLY IF NOT EXISTS idx_chat_session_desc_tsv_gen
+    ON chat_session USING GIN (description_tsv_gen)
+    """
+            )
+        )
+
+        # Drop old index and column
+        connection.execute(text("COMMIT"))
+
+        connection.execute(
+            text(
+                """
+    DROP INDEX CONCURRENTLY IF EXISTS idx_chat_session_desc_tsv;
+    """
+            )
+        )
+        connection.execute(text("COMMIT"))
+        connection.execute(
+            text(
+                """
+    ALTER TABLE chat_session DROP COLUMN description_tsv;
+    """
+            )
+        )
+
+        # Rename new column to old name
+        connection.execute(
+            text(
+                """
+    ALTER TABLE chat_session RENAME COLUMN description_tsv_gen TO description_tsv;
+    """
+            )
+        )
+
+    except Exception as e:
+        # Make sure to release the lock in case of error
+        connection.execute(
+            text("SELECT pg_advisory_unlock(:lock_key)").bindparams(
+                lock_key=ALEMBIC_MIGRATION_LOCK_KEY
+            )
+        )
+        raise e
+    finally:
+        # Release the advisory lock when done
+        connection.execute(
+            text("SELECT pg_advisory_unlock(:lock_key)").bindparams(
+                lock_key=ALEMBIC_MIGRATION_LOCK_KEY
+            )
+        )


 def downgrade() -> None:
--- a/backend/alembic_upgrade_retries.log
+++ b/backend/alembic_upgrade_retries.log
--- a/backend/asdf.py
+++ b/backend/asdf.py
@@ -0,0 +1,45 @@
+#!/usr/bin/env python
+"""
+Simple script that keeps trying to run 'alembic upgrade head' until it succeeds.
+"""
+import subprocess
+import sys
+import time
+
+# Path to alembic.ini (change this if needed)
+ALEMBIC_CONFIG = "alembic.ini"
+
+# Time to wait between attempts (in seconds)
+WAIT_TIME = 0
+
+print("Starting continuous alembic upgrade attempts")
+print(f"Using config: {ALEMBIC_CONFIG}")
+print(f"Will retry every {WAIT_TIME} seconds until successful")
+
+attempt = 1
+
+while True:
+    print(f"\nAttempt #{attempt} to run alembic upgrade head")
+
+    try:
+        # Run the alembic upgrade head command
+        result = subprocess.run(
+            ["alembic", "-c", ALEMBIC_CONFIG, "upgrade", "head"],
+            check=True,
+            capture_output=True,
+            text=True,
+        )
+
+        # If we get here, the command was successful
+        print("SUCCESS! Alembic upgrade completed successfully.")
+        print(f"Output: {result.stdout}")
+        sys.exit(0)
+
+    except subprocess.CalledProcessError as e:
+        # Command failed, print error and try again
+        print(f"FAILED with return code {e.returncode}")
+        print(f"Error output: {e.stderr}")
+
+    print(f"Waiting {WAIT_TIME} seconds before next attempt...")
+    time.sleep(WAIT_TIME)
+    attempt += 1
--- a/backend/ee/onyx/server/oauth/confluence_cloud.py
+++ b/backend/ee/onyx/server/oauth/confluence_cloud.py
@@ -80,7 +80,6 @@ class ConfluenceCloudOAuth:
        "search:confluence%20"
        # granular scope
        "read:attachment:confluence%20"  # possibly unneeded unless calling v2 attachments api
-        "read:content-details:confluence%20"  # for permission sync
        "offline_access"
    )

--- a/backend/ee/onyx/server/tenants/product_gating.py
+++ b/backend/ee/onyx/server/tenants/product_gating.py
@@ -48,5 +48,4 @@ def store_product_gating(tenant_id: str, application_status: ApplicationStatus)

 def get_gated_tenants() -> set[str]:
    redis_client = get_redis_replica_client(tenant_id=ONYX_CLOUD_TENANT_ID)
-    gated_tenants_bytes = cast(set[bytes], redis_client.smembers(GATED_TENANTS_KEY))
-    return {tenant_id.decode("utf-8") for tenant_id in gated_tenants_bytes}
+    return cast(set[str], redis_client.smembers(GATED_TENANTS_KEY))
--- a/backend/ee/onyx/server/tenants/provisioning.py
+++ b/backend/ee/onyx/server/tenants/provisioning.py
@@ -55,11 +55,7 @@ logger = logging.getLogger(__name__)
 async def get_or_provision_tenant(
    email: str, referral_source: str | None = None, request: Request | None = None
 ) -> str:
-    """
-    Get existing tenant ID for an email or create a new tenant if none exists.
-    This function should only be called after we have verified we want this user's tenant to exist.
-    It returns the tenant ID associated with the email, creating a new tenant if necessary.
-    """
+    """Get existing tenant ID for an email or create a new tenant if none exists."""
    if not MULTI_TENANT:
        return POSTGRES_DEFAULT_SCHEMA

--- a/backend/onyx/auth/users.py
+++ b/backend/onyx/auth/users.py
@@ -587,20 +587,14 @@ class UserManager(UUIDIDMixin, BaseUserManager[User, uuid.UUID]):
    ) -> Optional[User]:
        email = credentials.username

-        tenant_id: str | None = None
-        try:
-            tenant_id = fetch_ee_implementation_or_noop(
-                "onyx.server.tenants.provisioning",
-                "get_tenant_id_for_email",
-                None,
-            )(
-                email=email,
-            )
-        except Exception as e:
-            logger.warning(
-                f"User attempted to login with invalid credentials: {str(e)}"
-            )
-
+        # Get tenant_id from mapping table
+        tenant_id = await fetch_ee_implementation_or_noop(
+            "onyx.server.tenants.provisioning",
+            "get_or_provision_tenant",
+            async_return_default_schema,
+        )(
+            email=email,
+        )
        if not tenant_id:
            # User not found in mapping
            self.password_helper.hash(credentials.password)
--- a/backend/onyx/configs/app_configs.py
+++ b/backend/onyx/configs/app_configs.py
@@ -643,3 +643,7 @@ MOCK_LLM_RESPONSE = (


 DEFAULT_IMAGE_ANALYSIS_MAX_SIZE_MB = 20
+
+ALEMBIC_MIGRATION_LOCK_KEY = os.environ.get(
+    "ALEMBIC_MIGRATION_LOCK_KEY", "alembic_migration_lock"
+)
--- a/backend/onyx/connectors/confluence/connector.py
+++ b/backend/onyx/connectors/confluence/connector.py
@@ -240,7 +240,7 @@ class ConfluenceConnector(
            # Extract basic page information
            page_id = page["id"]
            page_title = page["title"]
-            page_url = f"{self.wiki_base}{page['_links']['webui']}"
+            page_url = f"{self.wiki_base}/wiki{page['_links']['webui']}"

            # Get the page content
            page_content = extract_text_from_confluence_html(
--- a/backend/onyx/connectors/github/connector.py
+++ b/backend/onyx/connectors/github/connector.py
@@ -124,14 +124,14 @@ class GithubConnector(LoadConnector, PollConnector):
    def __init__(
        self,
        repo_owner: str,
-        repositories: str | None = None,
+        repo_name: str | None = None,
        batch_size: int = INDEX_BATCH_SIZE,
        state_filter: str = "all",
        include_prs: bool = True,
        include_issues: bool = False,
    ) -> None:
        self.repo_owner = repo_owner
-        self.repositories = repositories
+        self.repo_name = repo_name
        self.batch_size = batch_size
        self.state_filter = state_filter
        self.include_prs = include_prs
@@ -157,42 +157,11 @@ class GithubConnector(LoadConnector, PollConnector):
            )

        try:
-            return github_client.get_repo(f"{self.repo_owner}/{self.repositories}")
+            return github_client.get_repo(f"{self.repo_owner}/{self.repo_name}")
        except RateLimitExceededException:
            _sleep_after_rate_limit_exception(github_client)
            return self._get_github_repo(github_client, attempt_num + 1)

-    def _get_github_repos(
-        self, github_client: Github, attempt_num: int = 0
-    ) -> list[Repository.Repository]:
-        """Get specific repositories based on comma-separated repo_name string."""
-        if attempt_num > _MAX_NUM_RATE_LIMIT_RETRIES:
-            raise RuntimeError(
-                "Re-tried fetching repos too many times. Something is going wrong with fetching objects from Github"
-            )
-
-        try:
-            repos = []
-            # Split repo_name by comma and strip whitespace
-            repo_names = [
-                name.strip() for name in (cast(str, self.repositories)).split(",")
-            ]
-
-            for repo_name in repo_names:
-                if repo_name:  # Skip empty strings
-                    try:
-                        repo = github_client.get_repo(f"{self.repo_owner}/{repo_name}")
-                        repos.append(repo)
-                    except GithubException as e:
-                        logger.warning(
-                            f"Could not fetch repo {self.repo_owner}/{repo_name}: {e}"
-                        )
-
-            return repos
-        except RateLimitExceededException:
-            _sleep_after_rate_limit_exception(github_client)
-            return self._get_github_repos(github_client, attempt_num + 1)
-
    def _get_all_repos(
        self, github_client: Github, attempt_num: int = 0
    ) -> list[Repository.Repository]:
@@ -220,17 +189,11 @@ class GithubConnector(LoadConnector, PollConnector):
        if self.github_client is None:
            raise ConnectorMissingCredentialError("GitHub")

-        repos = []
-        if self.repositories:
-            if "," in self.repositories:
-                # Multiple repositories specified
-                repos = self._get_github_repos(self.github_client)
-            else:
-                # Single repository (backward compatibility)
-                repos = [self._get_github_repo(self.github_client)]
-        else:
-            # All repositories
-            repos = self._get_all_repos(self.github_client)
+        repos = (
+            [self._get_github_repo(self.github_client)]
+            if self.repo_name
+            else self._get_all_repos(self.github_client)
+        )

        for repo in repos:
            if self.include_prs:
@@ -305,48 +268,11 @@ class GithubConnector(LoadConnector, PollConnector):
            )

        try:
-            if self.repositories:
-                if "," in self.repositories:
-                    # Multiple repositories specified
-                    repo_names = [name.strip() for name in self.repositories.split(",")]
-                    if not repo_names:
-                        raise ConnectorValidationError(
-                            "Invalid connector settings: No valid repository names provided."
-                        )
-
-                    # Validate at least one repository exists and is accessible
-                    valid_repos = False
-                    validation_errors = []
-
-                    for repo_name in repo_names:
-                        if not repo_name:
-                            continue
-
-                        try:
-                            test_repo = self.github_client.get_repo(
-                                f"{self.repo_owner}/{repo_name}"
-                            )
-                            test_repo.get_contents("")
-                            valid_repos = True
-                            # If at least one repo is valid, we can proceed
-                            break
-                        except GithubException as e:
-                            validation_errors.append(
-                                f"Repository '{repo_name}': {e.data.get('message', str(e))}"
-                            )
-
-                    if not valid_repos:
-                        error_msg = (
-                            "None of the specified repositories could be accessed: "
-                        )
-                        error_msg += ", ".join(validation_errors)
-                        raise ConnectorValidationError(error_msg)
-                else:
-                    # Single repository (backward compatibility)
-                    test_repo = self.github_client.get_repo(
-                        f"{self.repo_owner}/{self.repositories}"
-                    )
-                    test_repo.get_contents("")
+            if self.repo_name:
+                test_repo = self.github_client.get_repo(
+                    f"{self.repo_owner}/{self.repo_name}"
+                )
+                test_repo.get_contents("")
            else:
                # Try to get organization first
                try:
@@ -372,15 +298,10 @@ class GithubConnector(LoadConnector, PollConnector):
                    "Your GitHub token does not have sufficient permissions for this repository (HTTP 403)."
                )
            elif e.status == 404:
-                if self.repositories:
-                    if "," in self.repositories:
-                        raise ConnectorValidationError(
-                            f"None of the specified GitHub repositories could be found for owner: {self.repo_owner}"
-                        )
-                    else:
-                        raise ConnectorValidationError(
-                            f"GitHub repository not found with name: {self.repo_owner}/{self.repositories}"
-                        )
+                if self.repo_name:
+                    raise ConnectorValidationError(
+                        f"GitHub repository not found with name: {self.repo_owner}/{self.repo_name}"
+                    )
                else:
                    raise ConnectorValidationError(
                        f"GitHub user or organization not found: {self.repo_owner}"
@@ -389,7 +310,6 @@ class GithubConnector(LoadConnector, PollConnector):
                raise ConnectorValidationError(
                    f"Unexpected GitHub error (status={e.status}): {e.data}"
                )
-
        except Exception as exc:
            raise Exception(
                f"Unexpected error during GitHub settings validation: {exc}"
@@ -401,7 +321,7 @@ if __name__ == "__main__":

    connector = GithubConnector(
        repo_owner=os.environ["REPO_OWNER"],
-        repositories=os.environ["REPOSITORIES"],
+        repo_name=os.environ["REPO_NAME"],
    )
    connector.load_credentials(
        {"github_access_token": os.environ["GITHUB_ACCESS_TOKEN"]}
--- a/backend/onyx/indexing/indexing_pipeline.py
+++ b/backend/onyx/indexing/indexing_pipeline.py
@@ -464,29 +464,12 @@ def index_doc_batch(
            ),
        )

-        all_returned_doc_ids = (
-            {record.document_id for record in insertion_records}
-            .union(
-                {
-                    record.failed_document.document_id
-                    for record in vector_db_write_failures
-                    if record.failed_document
-                }
-            )
-            .union(
-                {
-                    record.failed_document.document_id
-                    for record in embedding_failures
-                    if record.failed_document
-                }
-            )
-        )
-        if all_returned_doc_ids != set(updatable_ids):
+        successful_doc_ids = {record.document_id for record in insertion_records}
+        if successful_doc_ids != set(updatable_ids):
            raise RuntimeError(
                f"Some documents were not successfully indexed. "
                f"Updatable IDs: {updatable_ids}, "
-                f"Returned IDs: {all_returned_doc_ids}. "
-                "This should never happen."
+                f"Successful IDs: {successful_doc_ids}"
            )

        last_modified_ids = []
--- a/backend/tests/daily/connectors/confluence/test_confluence_basic.py
+++ b/backend/tests/daily/connectors/confluence/test_confluence_basic.py
@@ -45,7 +45,7 @@ def test_confluence_connector_basic(
    with pytest.raises(StopIteration):
        next(doc_batch_generator)

-    assert len(doc_batch) == 2
+    assert len(doc_batch) == 3

    page_within_a_page_doc: Document | None = None
    page_doc: Document | None = None
--- a/deployment/README.md
+++ b/deployment/README.md
@@ -80,13 +80,3 @@ prod cluster**
   - `kubectl delete -f .`
   - To not delete the persistent volumes (Document indexes and Users), specify the specific `.yaml` files instead of
     `.` without specifying delete on persistent-volumes.yaml.
-
-### Using Helm to deploy to an existing cluster
-
-Onyx has a helm chart that is convenient to install all services to an existing Kubernetes cluster. To install:
-
-* Currently the helm chart is not published so to install, clone the repo.
-* Configure access to the cluster via kubectl. Ensure the kubectl context is set to the cluster that you want to use
-* The default secrets, environment variables and other service level configuration are stored in `deployment/helm/charts/onyx/values.yml`. You may create another `override.yml`
-* `cd deployment/helm/charts/onyx` and run `helm install onyx -n onyx -f override.yaml .`. This will install onyx on the cluster under the `onyx` namespace.
-* Check the status of the deploy using `kubectl get pods -n onyx`
--- a/deployment/helm/charts/onyx/templates/ingress-api.yaml
+++ b/deployment/helm/charts/onyx/templates/ingress-api.yaml
@@ -1,27 +0,0 @@
-{{- if .Values.ingress.enabled -}}
-apiVersion: networking.k8s.io/v1
-kind: Ingress
-metadata:
-  name: {{ include "onyx-stack.fullname" . }}-ingress-api
-  annotations:
-    kubernetes.io/ingress.class: nginx
-    nginx.ingress.kubernetes.io/rewrite-target: /$2
-    nginx.ingress.kubernetes.io/use-regex: "true"
-    cert-manager.io/cluster-issuer: {{ include "onyx-stack.fullname" . }}-letsencrypt
-spec:
-  rules:
-    - host: {{ .Values.ingress.api.host }}
-      http:
-        paths:
-          - path: /api(/|$)(.*)
-            pathType: Prefix
-            backend:
-              service:
-                name: {{ include "onyx-stack.fullname" . }}-api-service
-                port:
-                  number: {{ .Values.api.service.servicePort }}
-  tls:
-    - hosts:
-        - {{ .Values.ingress.api.host }}
-      secretName: {{ include "onyx-stack.fullname" . }}-ingress-api-tls
-{{- end }}
--- a/deployment/helm/charts/onyx/templates/ingress-webserver.yaml
+++ b/deployment/helm/charts/onyx/templates/ingress-webserver.yaml
@@ -1,26 +0,0 @@
-{{- if .Values.ingress.enabled -}}
-apiVersion: networking.k8s.io/v1
-kind: Ingress
-metadata:
-  name: {{ include "onyx-stack.fullname" . }}-ingress-webserver
-  annotations:
-    kubernetes.io/ingress.class: nginx
-    cert-manager.io/cluster-issuer: {{ include "onyx-stack.fullname" . }}-letsencrypt
-    kubernetes.io/tls-acme: "true"
-spec:
-  rules:
-    - host: {{ .Values.ingress.webserver.host }}
-      http:
-        paths:
-          - path: /
-            pathType: Prefix
-            backend:
-              service:
-                name: {{ include "onyx-stack.fullname" . }}-webserver
-                port:
-                  number: {{ .Values.webserver.service.servicePort }}
-  tls:
-    - hosts:
-        - {{ .Values.ingress.webserver.host }}
-      secretName: {{ include "onyx-stack.fullname" . }}-ingress-webserver-tls
-{{- end }}
--- a/deployment/helm/charts/onyx/templates/lets-encrypt.yaml
+++ b/deployment/helm/charts/onyx/templates/lets-encrypt.yaml
@@ -1,20 +0,0 @@
-{{- if .Values.letsencrypt.enabled -}}
-apiVersion: cert-manager.io/v1
-kind: ClusterIssuer
-metadata:
-  name: {{ include "onyx-stack.fullname" . }}-letsencrypt
-spec:
-  acme:
-    # The ACME server URL
-    server: https://acme-v02.api.letsencrypt.org/directory
-    # Email address used for ACME registration
-    email: {{ .Values.letsencrypt.email }}
-    # Name of a secret used to store the ACME account private key
-    privateKeySecretRef:
-      name: {{ include "onyx-stack.fullname" . }}-letsencrypt
-    # Enable the HTTP-01 challenge provider
-    solvers:
-      - http01:
-          ingress:
-            class: nginx
-{{- end }}
--- a/deployment/helm/charts/onyx/values.yaml
+++ b/deployment/helm/charts/onyx/values.yaml
@@ -376,17 +376,22 @@ redis:
    existingSecret: onyx-secrets
    existingSecretPasswordKey: redis_password

-ingress:
-  enabled: false
-  className: ""
-  api:
-    host: onyx.local
-  webserver:
-    host: onyx.local
+# ingress:
+#  enabled: false
+#  className: ""
+#  annotations: {}
+#    # kubernetes.io/ingress.class: nginx
+#    # kubernetes.io/tls-acme: "true"
+#  hosts:
+#    - host: chart-example.local
+#      paths:
+#        - path: /
+#          pathType: ImplementationSpecific
+#  tls: []
+#  #  - secretName: chart-example-tls
+#  #    hosts:
+#  #      - chart-example.local

-letsencrypt:
-  enabled: false
-  email: "abc@abc.com"

 auth:
  # existingSecret onyx-secret for storing smtp, oauth, slack, and other secrets
--- a/web/src/app/auth/login/EmailPasswordForm.tsx
+++ b/web/src/app/auth/login/EmailPasswordForm.tsx
@@ -61,7 +61,6 @@ export function EmailPasswordForm({

            if (!response.ok) {
              setIsWorking(false);
-
              const errorDetail = (await response.json()).detail;
              let errorMsg = "Unknown error";
              if (typeof errorDetail === "object" && errorDetail.reason) {
@@ -97,13 +96,12 @@ export function EmailPasswordForm({
          } else {
            setIsWorking(false);
            const errorDetail = (await loginResponse.json()).detail;
+
            let errorMsg = "Unknown error";
            if (errorDetail === "LOGIN_BAD_CREDENTIALS") {
              errorMsg = "Invalid email or password";
            } else if (errorDetail === "NO_WEB_LOGIN_AND_HAS_NO_PASSWORD") {
              errorMsg = "Create an account to set a password";
-            } else if (typeof errorDetail === "string") {
-              errorMsg = errorDetail;
            }
            if (loginResponse.status === 429) {
              errorMsg = "Too many requests. Please try again later.";
--- a/web/src/app/chat/folders/FolderDropdown.tsx
+++ b/web/src/app/chat/folders/FolderDropdown.tsx
@@ -191,7 +191,6 @@ export const FolderDropdown = forwardRef<HTMLDivElement, FolderDropdownProps>(
                    onChange={(e) => setNewFolderName(e.target.value)}
                    className="text-sm font-medium bg-transparent outline-none w-full pb-1 border-b border-background-500 transition-colors duration-200"
                    onKeyDown={(e) => {
-                      e.stopPropagation();
                      if (e.key === "Enter") {
                        handleEdit();
                      }
--- a/web/src/app/chat/folders/FolderList.tsx
+++ b/web/src/app/chat/folders/FolderList.tsx
@@ -303,6 +303,7 @@ const FolderItem = ({
              key={chatSession.id}
              chatSession={chatSession}
              isSelected={chatSession.id === currentChatId}
+              skipGradient={isDragOver}
              showShareModal={showShareModal}
              showDeleteModal={showDeleteModal}
            />
--- a/web/src/app/chat/sessionSidebar/ChatSessionDisplay.tsx
+++ b/web/src/app/chat/sessionSidebar/ChatSessionDisplay.tsx
@@ -32,17 +32,21 @@ export function ChatSessionDisplay({
  chatSession,
  search,
  isSelected,
+  skipGradient,
  closeSidebar,
  showShareModal,
  showDeleteModal,
+  foldersExisting,
  isDragging,
 }: {
  chatSession: ChatSession;
  isSelected: boolean;
  search?: boolean;
+  skipGradient?: boolean;
  closeSidebar?: () => void;
  showShareModal?: (chatSession: ChatSession) => void;
  showDeleteModal?: (chatSession: ChatSession) => void;
+  foldersExisting?: boolean;
  isDragging?: boolean;
 }) {
  const router = useRouter();
@@ -234,12 +238,8 @@ export function ChatSessionDisplay({
                          e.preventDefault();
                          e.stopPropagation();
                        }}
-                        onChange={(e) => {
-                          setChatName(e.target.value);
-                        }}
+                        onChange={(e) => setChatName(e.target.value)}
                        onKeyDown={(event) => {
-                          event.stopPropagation();
-
                          if (event.key === "Enter") {
                            onRename();
                            event.preventDefault();
--- a/web/src/app/chat/sessionSidebar/PagesTab.tsx
+++ b/web/src/app/chat/sessionSidebar/PagesTab.tsx
@@ -264,6 +264,7 @@ export function PagesTab({
        >
          <ChatSessionDisplay
            chatSession={chat}
+            foldersExisting={foldersExisting}
            isSelected={currentChatId === chat.id}
            showShareModal={showShareModal}
            showDeleteModal={showDeleteModal}
--- a/web/src/components/admin/connectors/ConnectorTitle.tsx
+++ b/web/src/components/admin/connectors/ConnectorTitle.tsx
@@ -40,12 +40,8 @@ export const ConnectorTitle = ({
    const typedConnector = connector as Connector<GithubConfig>;
    additionalMetadata.set(
      "Repo",
-      typedConnector.connector_specific_config.repositories
-        ? `${typedConnector.connector_specific_config.repo_owner}/${
-            typedConnector.connector_specific_config.repositories.includes(",")
-              ? "multiple repos"
-              : typedConnector.connector_specific_config.repositories
-          }`
+      typedConnector.connector_specific_config.repo_name
+        ? `${typedConnector.connector_specific_config.repo_owner}/${typedConnector.connector_specific_config.repo_name}`
        : `${typedConnector.connector_specific_config.repo_owner}/*`
    );
  } else if (connector.source === "gitlab") {
--- a/web/src/lib/connectors/connectors.tsx
+++ b/web/src/lib/connectors/connectors.tsx
@@ -190,12 +190,10 @@ export const connectorConfigs: Record<
            fields: [
              {
                type: "text",
-                query: "Enter the repository name(s):",
-                label: "Repository Name(s)",
-                name: "repositories",
+                query: "Enter the repository name:",
+                label: "Repository Name",
+                name: "repo_name",
                optional: false,
-                description:
-                  "For multiple repositories, enter comma-separated names (e.g., repo1,repo2,repo3)",
              },
            ],
          },
@@ -1360,7 +1358,7 @@ export interface WebConfig {

 export interface GithubConfig {
  repo_owner: string;
-  repositories: string; // Comma-separated list of repository names
+  repo_name: string;
  include_prs: boolean;
  include_issues: boolean;
 }
Author	SHA1	Message	Date
pablonyx	02322b8567	k	2025-03-11 16:03:02 -07:00
pablonyx	fef08ffdc6	k	2025-03-11 16:02:10 -07:00
pablonyx	168d77a3d7	redis -> pg advisory lock (https://www.postgresql.org/docs/current/explicit-locking.html#ADVISORY-LOCKS )	2025-03-11 15:59:17 -07:00
pablonyx	b077de1449	improved safety	2025-03-11 15:49:31 -07:00
pablonyx	307c07b12e	k	2025-03-06 09:39:18 -08:00