k

2026-02-17 15:55:45 +00:00 · 2025-03-03 18:18:47 -08:00
91 changed files with 1290 additions and 4551 deletions
--- a/backend/alembic/versions/3bd4c84fe72f_improved_index.py
+++ b/backend/alembic/versions/3bd4c84fe72f_improved_index.py
@@ -6,8 +6,7 @@ Create Date: 2025-02-26 13:07:56.217791

 """
 from alembic import op
-import time
-from sqlalchemy import text
+

 # revision identifiers, used by Alembic.
 revision = "3bd4c84fe72f"
@@ -28,357 +27,45 @@ depends_on = None
 # 4. Adds indexes to both chat_message and chat_session tables for comprehensive search


-def upgrade():
-    # --- PART 1: chat_message table ---
-    # Step 1: Add nullable column (quick, minimal locking)
-    # op.execute("ALTER TABLE chat_message DROP COLUMN IF EXISTS message_tsv")
-    # op.execute("DROP TRIGGER IF EXISTS chat_message_tsv_trigger ON chat_message")
-    # op.execute("DROP FUNCTION IF EXISTS update_chat_message_tsv()")
-    # op.execute("ALTER TABLE chat_message DROP COLUMN IF EXISTS message_tsv")
-    # # Drop chat_session tsv trigger if it exists
-    # op.execute("DROP TRIGGER IF EXISTS chat_session_tsv_trigger ON chat_session")
-    # op.execute("DROP FUNCTION IF EXISTS update_chat_session_tsv()")
-    # op.execute("ALTER TABLE chat_session DROP COLUMN IF EXISTS title_tsv")
-    # raise Exception("Stop here")
-    time.time()
-    op.execute("ALTER TABLE chat_message ADD COLUMN IF NOT EXISTS message_tsv tsvector")
-
-    # Step 2: Create function and trigger for new/updated rows
+def upgrade() -> None:
+    # Create a GIN index for full-text search on chat_message.message
    op.execute(
        """
-    CREATE OR REPLACE FUNCTION update_chat_message_tsv()
-    RETURNS TRIGGER AS $$
-    BEGIN
-      NEW.message_tsv = to_tsvector('english', NEW.message);
-      RETURN NEW;
-    END;
-    $$ LANGUAGE plpgsql
-    """
+        ALTER TABLE chat_message
+        ADD COLUMN message_tsv tsvector
+        GENERATED ALWAYS AS (to_tsvector('english', message)) STORED;
+        """
    )

-    # Create trigger in a separate execute call
+    # Commit the current transaction before creating concurrent indexes
+    op.execute("COMMIT")
+
    op.execute(
        """
-    CREATE TRIGGER chat_message_tsv_trigger
-    BEFORE INSERT OR UPDATE ON chat_message
-    FOR EACH ROW EXECUTE FUNCTION update_chat_message_tsv()
-    """
+        CREATE INDEX CONCURRENTLY IF NOT EXISTS idx_chat_message_tsv
+        ON chat_message
+        USING GIN (message_tsv)
+        """
    )

-    # Step 3: Update existing rows in batches using Python
-    time.time()
-
-    # Get connection and count total rows
-    connection = op.get_bind()
-    total_count_result = connection.execute(
-        text("SELECT COUNT(*) FROM chat_message")
-    ).scalar()
-    total_count = total_count_result if total_count_result is not None else 0
-    batch_size = 5000
-    batches = 0
-
-    # Calculate total batches needed
-    total_batches = (
-        (total_count + batch_size - 1) // batch_size if total_count > 0 else 0
+    # Also add a stored tsvector column for chat_session.description
+    op.execute(
+        """
+        ALTER TABLE chat_session
+        ADD COLUMN description_tsv tsvector
+        GENERATED ALWAYS AS (to_tsvector('english', coalesce(description, ''))) STORED;
+        """
    )

-    # Process in batches - properly handling UUIDs by using OFFSET/LIMIT approach
-    for batch_num in range(total_batches):
-        offset = batch_num * batch_size
+    # Commit again before creating the second concurrent index
+    op.execute("COMMIT")

-        # Execute update for this batch using OFFSET/LIMIT which works with UUIDs
-        connection.execute(
-            text(
-                """
-            UPDATE chat_message
-            SET message_tsv = to_tsvector('english', message)
-            WHERE id IN (
-                SELECT id FROM chat_message
-                WHERE message_tsv IS NULL
-                ORDER BY id
-                LIMIT :batch_size OFFSET :offset
-            )
-            """
-            ).bindparams(batch_size=batch_size, offset=offset)
-        )
-
-        # Commit each batch
-        connection.execute(text("COMMIT"))
-        # Start a new transaction
-        connection.execute(text("BEGIN"))
-
-        batches += 1
-
-    # Final check for any remaining NULL values
-    connection.execute(
-        text(
-            """
-    UPDATE chat_message SET message_tsv = to_tsvector('english', message)
-    WHERE message_tsv IS NULL
-    """
-        )
-    )
-
-    # Create GIN index concurrently
-    connection.execute(text("COMMIT"))
-
-    time.time()
-
-    connection.execute(
-        text(
-            """
-    CREATE INDEX CONCURRENTLY IF NOT EXISTS idx_chat_message_tsv
-    ON chat_message USING GIN (message_tsv)
-    """
-        )
-    )
-
-    # First drop the trigger as it won't be needed anymore
-    connection.execute(
-        text(
-            """
-    DROP TRIGGER IF EXISTS chat_message_tsv_trigger ON chat_message;
-    """
-        )
-    )
-
-    connection.execute(
-        text(
-            """
-    DROP FUNCTION IF EXISTS update_chat_message_tsv();
-    """
-        )
-    )
-
-    # Add new generated column
-    time.time()
-    connection.execute(
-        text(
-            """
-    ALTER TABLE chat_message
-    ADD COLUMN message_tsv_gen tsvector
-    GENERATED ALWAYS AS (to_tsvector('english', message)) STORED;
-    """
-        )
-    )
-
-    connection.execute(text("COMMIT"))
-
-    time.time()
-
-    connection.execute(
-        text(
-            """
-    CREATE INDEX CONCURRENTLY IF NOT EXISTS idx_chat_message_tsv_gen
-    ON chat_message USING GIN (message_tsv_gen)
-    """
-        )
-    )
-
-    # Drop old index and column
-    connection.execute(text("COMMIT"))
-
-    connection.execute(
-        text(
-            """
-    DROP INDEX CONCURRENTLY IF EXISTS idx_chat_message_tsv;
-    """
-        )
-    )
-    connection.execute(text("COMMIT"))
-    connection.execute(
-        text(
-            """
-    ALTER TABLE chat_message DROP COLUMN message_tsv;
-    """
-        )
-    )
-
-    # Rename new column to old name
-    connection.execute(
-        text(
-            """
-    ALTER TABLE chat_message RENAME COLUMN message_tsv_gen TO message_tsv;
-    """
-        )
-    )
-
-    # --- PART 2: chat_session table ---
-
-    # Step 1: Add nullable column (quick, minimal locking)
-    time.time()
-    connection.execute(
-        text(
-            "ALTER TABLE chat_session ADD COLUMN IF NOT EXISTS description_tsv tsvector"
-        )
-    )
-
-    # Step 2: Create function and trigger for new/updated rows - SPLIT INTO SEPARATE CALLS
-    connection.execute(
-        text(
-            """
-    CREATE OR REPLACE FUNCTION update_chat_session_tsv()
-    RETURNS TRIGGER AS $$
-    BEGIN
-      NEW.description_tsv = to_tsvector('english', COALESCE(NEW.description, ''));
-      RETURN NEW;
-    END;
-    $$ LANGUAGE plpgsql
-    """
-        )
-    )
-
-    # Create trigger in a separate execute call
-    connection.execute(
-        text(
-            """
-    CREATE TRIGGER chat_session_tsv_trigger
-    BEFORE INSERT OR UPDATE ON chat_session
-    FOR EACH ROW EXECUTE FUNCTION update_chat_session_tsv()
-    """
-        )
-    )
-
-    # Step 3: Update existing rows in batches using Python
-    time.time()
-
-    # Get the maximum ID to determine batch count
-    # Cast id to text for MAX function since it's a UUID
-    max_id_result = connection.execute(
-        text("SELECT COALESCE(MAX(id::text), '0') FROM chat_session")
-    ).scalar()
-    max_id_result if max_id_result is not None else "0"
-    batch_size = 5000
-    batches = 0
-
-    # Get all IDs ordered to process in batches
-    rows = connection.execute(
-        text("SELECT id FROM chat_session ORDER BY id")
-    ).fetchall()
-    total_rows = len(rows)
-
-    # Process in batches
-    for batch_num, batch_start in enumerate(range(0, total_rows, batch_size)):
-        batch_end = min(batch_start + batch_size, total_rows)
-        batch_ids = [row[0] for row in rows[batch_start:batch_end]]
-
-        if not batch_ids:
-            continue
-
-        # Use IN clause instead of BETWEEN for UUIDs
-        placeholders = ", ".join([f":id{i}" for i in range(len(batch_ids))])
-        params = {f"id{i}": id_val for i, id_val in enumerate(batch_ids)}
-
-        # Execute update for this batch
-        connection.execute(
-            text(
-                f"""
-            UPDATE chat_session
-            SET description_tsv = to_tsvector('english', COALESCE(description, ''))
-            WHERE id IN ({placeholders})
-            AND description_tsv IS NULL
-            """
-            ).bindparams(**params)
-        )
-
-        # Commit each batch
-        connection.execute(text("COMMIT"))
-        # Start a new transaction
-        connection.execute(text("BEGIN"))
-
-        batches += 1
-
-    # Final check for any remaining NULL values
-    connection.execute(
-        text(
-            """
-    UPDATE chat_session SET description_tsv = to_tsvector('english', COALESCE(description, ''))
-    WHERE description_tsv IS NULL
-    """
-        )
-    )
-
-    # Create GIN index concurrently
-    connection.execute(text("COMMIT"))
-
-    time.time()
-    connection.execute(
-        text(
-            """
-    CREATE INDEX CONCURRENTLY IF NOT EXISTS idx_chat_session_desc_tsv
-    ON chat_session USING GIN (description_tsv)
-    """
-        )
-    )
-
-    # After Final check for chat_session
-    # First drop the trigger as it won't be needed anymore
-    connection.execute(
-        text(
-            """
-    DROP TRIGGER IF EXISTS chat_session_tsv_trigger ON chat_session;
-    """
-        )
-    )
-
-    connection.execute(
-        text(
-            """
-    DROP FUNCTION IF EXISTS update_chat_session_tsv();
-    """
-        )
-    )
-    # Add new generated column
-    time.time()
-    connection.execute(
-        text(
-            """
-    ALTER TABLE chat_session
-    ADD COLUMN description_tsv_gen tsvector
-    GENERATED ALWAYS AS (to_tsvector('english', COALESCE(description, ''))) STORED;
-    """
-        )
-    )
-
-    # Create new index on generated column
-    connection.execute(text("COMMIT"))
-
-    time.time()
-    connection.execute(
-        text(
-            """
-    CREATE INDEX CONCURRENTLY IF NOT EXISTS idx_chat_session_desc_tsv_gen
-    ON chat_session USING GIN (description_tsv_gen)
-    """
-        )
-    )
-
-    # Drop old index and column
-    connection.execute(text("COMMIT"))
-
-    connection.execute(
-        text(
-            """
-    DROP INDEX CONCURRENTLY IF EXISTS idx_chat_session_desc_tsv;
-    """
-        )
-    )
-    connection.execute(text("COMMIT"))
-    connection.execute(
-        text(
-            """
-    ALTER TABLE chat_session DROP COLUMN description_tsv;
-    """
-        )
-    )
-
-    # Rename new column to old name
-    connection.execute(
-        text(
-            """
-    ALTER TABLE chat_session RENAME COLUMN description_tsv_gen TO description_tsv;
-    """
-        )
+    op.execute(
+        """
+        CREATE INDEX CONCURRENTLY IF NOT EXISTS idx_chat_session_desc_tsv
+        ON chat_session
+        USING GIN (description_tsv)
+        """
    )


--- a/backend/ee/onyx/db/query_history.py
+++ b/backend/ee/onyx/db/query_history.py
@@ -134,9 +134,7 @@ def fetch_chat_sessions_eagerly_by_time(
    limit: int | None = 500,
    initial_time: datetime | None = None,
 ) -> list[ChatSession]:
-    """Sorted by oldest to newest, then by message id"""
-
-    asc_time_order: UnaryExpression = asc(ChatSession.time_created)
+    time_order: UnaryExpression = desc(ChatSession.time_created)
    message_order: UnaryExpression = asc(ChatMessage.id)

    filters: list[ColumnElement | BinaryExpression] = [
@@ -149,7 +147,8 @@ def fetch_chat_sessions_eagerly_by_time(
    subquery = (
        db_session.query(ChatSession.id, ChatSession.time_created)
        .filter(*filters)
-        .order_by(asc_time_order)
+        .order_by(ChatSession.id, time_order)
+        .distinct(ChatSession.id)
        .limit(limit)
        .subquery()
    )
@@ -165,7 +164,7 @@ def fetch_chat_sessions_eagerly_by_time(
                ChatMessage.chat_message_feedbacks
            ),
        )
-        .order_by(asc_time_order, message_order)
+        .order_by(time_order, message_order)
    )

    chat_sessions = query.all()
--- a/backend/ee/onyx/db/usage_export.py
+++ b/backend/ee/onyx/db/usage_export.py
@@ -16,18 +16,13 @@ from onyx.db.models import UsageReport
 from onyx.file_store.file_store import get_default_file_store


-# Gets skeletons of all messages in the given range
+# Gets skeletons of all message
 def get_empty_chat_messages_entries__paginated(
    db_session: Session,
    period: tuple[datetime, datetime],
    limit: int | None = 500,
    initial_time: datetime | None = None,
 ) -> tuple[Optional[datetime], list[ChatMessageSkeleton]]:
-    """Returns a tuple where:
-    first element is the most recent timestamp out of the sessions iterated
-    - this timestamp can be used to paginate forward in time
-    second element is a list of messages belonging to all the sessions iterated
-    """
    chat_sessions = fetch_chat_sessions_eagerly_by_time(
        start=period[0],
        end=period[1],
@@ -57,17 +52,18 @@ def get_empty_chat_messages_entries__paginated(
    if len(chat_sessions) == 0:
        return None, []

-    return chat_sessions[-1].time_created, message_skeletons
+    return chat_sessions[0].time_created, message_skeletons


 def get_all_empty_chat_message_entries(
    db_session: Session,
    period: tuple[datetime, datetime],
 ) -> Generator[list[ChatMessageSkeleton], None, None]:
-    """period is the range of time over which to fetch messages."""
    initial_time: Optional[datetime] = period[0]
+    ind = 0
    while True:
-        # iterate from oldest to newest
+        ind += 1
+
        time_created, message_skeletons = get_empty_chat_messages_entries__paginated(
            db_session,
            period,
--- a/backend/ee/onyx/server/tenants/provisioning.py
+++ b/backend/ee/onyx/server/tenants/provisioning.py
@@ -104,14 +104,14 @@ async def provision_tenant(tenant_id: str, email: str) -> None:
            status_code=409, detail="User already belongs to an organization"
        )

-    logger.debug(f"Provisioning tenant {tenant_id} for user {email}")
+    logger.info(f"Provisioning tenant: {tenant_id}")
    token = None

    try:
        if not create_schema_if_not_exists(tenant_id):
-            logger.debug(f"Created schema for tenant {tenant_id}")
+            logger.info(f"Created schema for tenant {tenant_id}")
        else:
-            logger.debug(f"Schema already exists for tenant {tenant_id}")
+            logger.info(f"Schema already exists for tenant {tenant_id}")

        token = CURRENT_TENANT_ID_CONTEXTVAR.set(tenant_id)

--- a/backend/model_server/constants.py
+++ b/backend/model_server/constants.py
@@ -6,7 +6,7 @@ MODEL_WARM_UP_STRING = "hi " * 512
 DEFAULT_OPENAI_MODEL = "text-embedding-3-small"
 DEFAULT_COHERE_MODEL = "embed-english-light-v3.0"
 DEFAULT_VOYAGE_MODEL = "voyage-large-2-instruct"
-DEFAULT_VERTEX_MODEL = "text-embedding-005"
+DEFAULT_VERTEX_MODEL = "text-embedding-004"


 class EmbeddingModelTextType:
--- a/backend/model_server/encoders.py
+++ b/backend/model_server/encoders.py
@@ -5,7 +5,6 @@ from types import TracebackType
 from typing import cast
 from typing import Optional

-import aioboto3  # type: ignore
 import httpx
 import openai
 import vertexai  # type: ignore
@@ -29,13 +28,11 @@ from model_server.constants import DEFAULT_VERTEX_MODEL
 from model_server.constants import DEFAULT_VOYAGE_MODEL
 from model_server.constants import EmbeddingModelTextType
 from model_server.constants import EmbeddingProvider
-from model_server.utils import pass_aws_key
 from model_server.utils import simple_log_function_time
 from onyx.utils.logger import setup_logger
 from shared_configs.configs import API_BASED_EMBEDDING_TIMEOUT
 from shared_configs.configs import INDEXING_ONLY
 from shared_configs.configs import OPENAI_EMBEDDING_TIMEOUT
-from shared_configs.configs import VERTEXAI_EMBEDDING_LOCAL_BATCH_SIZE
 from shared_configs.enums import EmbedTextType
 from shared_configs.enums import RerankerProvider
 from shared_configs.model_server_models import Embedding
@@ -185,24 +182,17 @@ class CloudEmbedding:
        vertexai.init(project=project_id, credentials=credentials)
        client = TextEmbeddingModel.from_pretrained(model)

-        inputs = [TextEmbeddingInput(text, embedding_type) for text in texts]
-
-        # Split into batches of 25 texts
-        max_texts_per_batch = VERTEXAI_EMBEDDING_LOCAL_BATCH_SIZE
-        batches = [
-            inputs[i : i + max_texts_per_batch]
-            for i in range(0, len(inputs), max_texts_per_batch)
-        ]
-
-        # Dispatch all embedding calls asynchronously at once
-        tasks = [
-            client.get_embeddings_async(batch, auto_truncate=True) for batch in batches
-        ]
-
-        # Wait for all tasks to complete in parallel
-        results = await asyncio.gather(*tasks)
-
-        return [embedding.values for batch in results for embedding in batch]
+        embeddings = await client.get_embeddings_async(
+            [
+                TextEmbeddingInput(
+                    text,
+                    embedding_type,
+                )
+                for text in texts
+            ],
+            auto_truncate=True,  # This is the default
+        )
+        return [embedding.values for embedding in embeddings]

    async def _embed_litellm_proxy(
        self, texts: list[str], model_name: str | None
@@ -457,7 +447,7 @@ async def local_rerank(query: str, docs: list[str], model_name: str) -> list[flo
    )


-async def cohere_rerank_api(
+async def cohere_rerank(
    query: str, docs: list[str], model_name: str, api_key: str
 ) -> list[float]:
    cohere_client = CohereAsyncClient(api_key=api_key)
@@ -467,45 +457,6 @@ async def cohere_rerank_api(
    return [result.relevance_score for result in sorted_results]


-async def cohere_rerank_aws(
-    query: str,
-    docs: list[str],
-    model_name: str,
-    region_name: str,
-    aws_access_key_id: str,
-    aws_secret_access_key: str,
-) -> list[float]:
-    session = aioboto3.Session(
-        aws_access_key_id=aws_access_key_id, aws_secret_access_key=aws_secret_access_key
-    )
-    async with session.client(
-        "bedrock-runtime", region_name=region_name
-    ) as bedrock_client:
-        body = json.dumps(
-            {
-                "query": query,
-                "documents": docs,
-                "api_version": 2,
-            }
-        )
-        # Invoke the Bedrock model asynchronously
-        response = await bedrock_client.invoke_model(
-            modelId=model_name,
-            accept="application/json",
-            contentType="application/json",
-            body=body,
-        )
-
-        # Read the response asynchronously
-        response_body = json.loads(await response["body"].read())
-
-        # Extract and sort the results
-        results = response_body.get("results", [])
-        sorted_results = sorted(results, key=lambda item: item["index"])
-
-        return [result["relevance_score"] for result in sorted_results]
-
-
 async def litellm_rerank(
    query: str, docs: list[str], api_url: str, model_name: str, api_key: str | None
 ) -> list[float]:
@@ -621,32 +572,15 @@ async def process_rerank_request(rerank_request: RerankRequest) -> RerankRespons
        elif rerank_request.provider_type == RerankerProvider.COHERE:
            if rerank_request.api_key is None:
                raise RuntimeError("Cohere Rerank Requires an API Key")
-            sim_scores = await cohere_rerank_api(
+            sim_scores = await cohere_rerank(
                query=rerank_request.query,
                docs=rerank_request.documents,
                model_name=rerank_request.model_name,
                api_key=rerank_request.api_key,
            )
            return RerankResponse(scores=sim_scores)
-
-        elif rerank_request.provider_type == RerankerProvider.BEDROCK:
-            if rerank_request.api_key is None:
-                raise RuntimeError("Bedrock Rerank Requires an API Key")
-            aws_access_key_id, aws_secret_access_key, aws_region = pass_aws_key(
-                rerank_request.api_key
-            )
-            sim_scores = await cohere_rerank_aws(
-                query=rerank_request.query,
-                docs=rerank_request.documents,
-                model_name=rerank_request.model_name,
-                region_name=aws_region,
-                aws_access_key_id=aws_access_key_id,
-                aws_secret_access_key=aws_secret_access_key,
-            )
-            return RerankResponse(scores=sim_scores)
        else:
            raise ValueError(f"Unsupported provider: {rerank_request.provider_type}")
-
    except Exception as e:
        logger.exception(f"Error during reranking process:\n{str(e)}")
        raise HTTPException(
--- a/backend/model_server/utils.py
+++ b/backend/model_server/utils.py
@@ -70,32 +70,3 @@ def get_gpu_type() -> str:
        return GPUStatus.MAC_MPS

    return GPUStatus.NONE
-
-
-def pass_aws_key(api_key: str) -> tuple[str, str, str]:
-    """Parse AWS API key string into components.
-
-    Args:
-        api_key: String in format 'aws_ACCESSKEY_SECRETKEY_REGION'
-
-    Returns:
-        Tuple of (access_key, secret_key, region)
-
-    Raises:
-        ValueError: If key format is invalid
-    """
-    if not api_key.startswith("aws"):
-        raise ValueError("API key must start with 'aws' prefix")
-
-    parts = api_key.split("_")
-    if len(parts) != 4:
-        raise ValueError(
-            f"API key must be in format 'aws_ACCESSKEY_SECRETKEY_REGION', got {len(parts) - 1} parts"
-            "this is an onyx specific format for formatting the aws secrets for bedrock"
-        )
-
-    try:
-        _, aws_access_key_id, aws_secret_access_key, aws_region = parts
-        return aws_access_key_id, aws_secret_access_key, aws_region
-    except Exception as e:
-        raise ValueError(f"Failed to parse AWS key components: {str(e)}")
--- a/backend/onyx/agents/agent_search/orchestration/nodes/choose_tool.py
+++ b/backend/onyx/agents/agent_search/orchestration/nodes/choose_tool.py
@@ -98,16 +98,8 @@ def choose_tool(
        # For tool calling LLMs, we want to insert the task prompt as part of this flow, this is because the LLM
        # may choose to not call any tools and just generate the answer, in which case the task prompt is needed.
        prompt=built_prompt,
-        tools=(
-            [tool.tool_definition() for tool in tools] or None
-            if using_tool_calling_llm
-            else None
-        ),
-        tool_choice=(
-            "required"
-            if tools and force_use_tool.force_use and using_tool_calling_llm
-            else None
-        ),
+        tools=[tool.tool_definition() for tool in tools] or None,
+        tool_choice=("required" if tools and force_use_tool.force_use else None),
        structured_response_format=structured_response_format,
    )

--- a/backend/onyx/auth/users.py
+++ b/backend/onyx/auth/users.py
@@ -523,13 +523,12 @@ class UserManager(UUIDIDMixin, BaseUserManager[User, uuid.UUID]):
        token = CURRENT_TENANT_ID_CONTEXTVAR.set(tenant_id)
        try:
            user_count = await get_user_count()
-            logger.debug(f"Current tenant user count: {user_count}")

            with get_session_with_tenant(tenant_id=tenant_id) as db_session:
                if user_count == 1:
                    create_milestone_and_report(
                        user=user,
-                        distinct_id=user.email,
+                        distinct_id=f"{user.email}_{tenant_id}",
                        event_type=MilestoneRecordType.USER_SIGNED_UP,
                        properties=None,
                        db_session=db_session,
@@ -537,7 +536,7 @@ class UserManager(UUIDIDMixin, BaseUserManager[User, uuid.UUID]):
                else:
                    create_milestone_and_report(
                        user=user,
-                        distinct_id=user.email,
+                        distinct_id=f"{user.email}_{tenant_id}",
                        event_type=MilestoneRecordType.MULTIPLE_USERS,
                        properties=None,
                        db_session=db_session,
@@ -545,7 +544,7 @@ class UserManager(UUIDIDMixin, BaseUserManager[User, uuid.UUID]):
        finally:
            CURRENT_TENANT_ID_CONTEXTVAR.reset(token)

-        logger.debug(f"User {user.id} has registered.")
+        logger.notice(f"User {user.id} has registered.")
        optional_telemetry(
            record_type=RecordType.SIGN_UP,
            data={"action": "create"},
--- a/backend/onyx/chat/process_message.py
+++ b/backend/onyx/chat/process_message.py
@@ -756,7 +756,6 @@ def stream_chat_message_objects(
        )

        # LLM prompt building, response capturing, etc.
-
        answer = Answer(
            prompt_builder=prompt_builder,
            is_connected=is_connected,
--- a/backend/onyx/configs/app_configs.py
+++ b/backend/onyx/configs/app_configs.py
@@ -640,6 +640,3 @@ TEST_ENV = os.environ.get("TEST_ENV", "").lower() == "true"
 MOCK_LLM_RESPONSE = (
    os.environ.get("MOCK_LLM_RESPONSE") if os.environ.get("MOCK_LLM_RESPONSE") else None
 )
-
-
-DEFAULT_IMAGE_ANALYSIS_MAX_SIZE_MB = 20
--- a/backend/onyx/configs/llm_configs.py
+++ b/backend/onyx/configs/llm_configs.py
@@ -1,38 +0,0 @@
-from onyx.configs.app_configs import DEFAULT_IMAGE_ANALYSIS_MAX_SIZE_MB
-from onyx.server.settings.store import load_settings
-
-
-def get_image_extraction_and_analysis_enabled() -> bool:
-    """Get image extraction and analysis enabled setting from workspace settings or fallback to False"""
-    try:
-        settings = load_settings()
-        if settings.image_extraction_and_analysis_enabled is not None:
-            return settings.image_extraction_and_analysis_enabled
-    except Exception:
-        pass
-
-    return False
-
-
-def get_search_time_image_analysis_enabled() -> bool:
-    """Get search time image analysis enabled setting from workspace settings or fallback to False"""
-    try:
-        settings = load_settings()
-        if settings.search_time_image_analysis_enabled is not None:
-            return settings.search_time_image_analysis_enabled
-    except Exception:
-        pass
-
-    return False
-
-
-def get_image_analysis_max_size_mb() -> int:
-    """Get image analysis max size MB setting from workspace settings or fallback to environment variable"""
-    try:
-        settings = load_settings()
-        if settings.image_analysis_max_size_mb is not None:
-            return settings.image_analysis_max_size_mb
-    except Exception:
-        pass
-
-    return DEFAULT_IMAGE_ANALYSIS_MAX_SIZE_MB
--- a/backend/onyx/connectors/airtable/airtable_connector.py
+++ b/backend/onyx/connectors/airtable/airtable_connector.py
@@ -200,6 +200,7 @@ class AirtableConnector(LoadConnector):
                                        return attachment_response.content

                            logger.error(f"Failed to refresh attachment for {filename}")
+
                        raise

                attachment_content = get_attachment_with_retry(url, record_id)
--- a/backend/onyx/connectors/confluence/connector.py
+++ b/backend/onyx/connectors/confluence/connector.py
@@ -11,12 +11,13 @@ from onyx.configs.app_configs import CONFLUENCE_TIMEZONE_OFFSET
 from onyx.configs.app_configs import CONTINUE_ON_CONNECTOR_FAILURE
 from onyx.configs.app_configs import INDEX_BATCH_SIZE
 from onyx.configs.constants import DocumentSource
-from onyx.connectors.confluence.onyx_confluence import extract_text_from_confluence_html
+from onyx.connectors.confluence.onyx_confluence import attachment_to_content
+from onyx.connectors.confluence.onyx_confluence import (
+    extract_text_from_confluence_html,
+)
 from onyx.connectors.confluence.onyx_confluence import OnyxConfluence
 from onyx.connectors.confluence.utils import build_confluence_document_id
-from onyx.connectors.confluence.utils import convert_attachment_to_content
 from onyx.connectors.confluence.utils import datetime_from_string
-from onyx.connectors.confluence.utils import process_attachment
 from onyx.connectors.confluence.utils import validate_attachment_filetype
 from onyx.connectors.exceptions import ConnectorValidationError
 from onyx.connectors.exceptions import CredentialExpiredError
@@ -35,26 +36,28 @@ from onyx.connectors.models import ConnectorMissingCredentialError
 from onyx.connectors.models import Document
 from onyx.connectors.models import Section
 from onyx.connectors.models import SlimDocument
-from onyx.connectors.vision_enabled_connector import VisionEnabledConnector
 from onyx.indexing.indexing_heartbeat import IndexingHeartbeatInterface
 from onyx.utils.logger import setup_logger

 logger = setup_logger()
+
 # Potential Improvements
-# 1. Segment into Sections for more accurate linking, can split by headers but make sure no text/ordering is lost
+# 1. Include attachments, etc
+# 2. Segment into Sections for more accurate linking, can split by headers but make sure no text/ordering is lost
+
 _COMMENT_EXPANSION_FIELDS = ["body.storage.value"]
 _PAGE_EXPANSION_FIELDS = [
    "body.storage.value",
    "version",
    "space",
    "metadata.labels",
-    "history.lastUpdated",
 ]
 _ATTACHMENT_EXPANSION_FIELDS = [
    "version",
    "space",
    "metadata.labels",
 ]
+
 _RESTRICTIONS_EXPANSION_FIELDS = [
    "space",
    "restrictions.read.restrictions.user",
@@ -84,11 +87,7 @@ _FULL_EXTENSION_FILTER_STRING = "".join(


 class ConfluenceConnector(
-    LoadConnector,
-    PollConnector,
-    SlimConnector,
-    CredentialsConnector,
-    VisionEnabledConnector,
+    LoadConnector, PollConnector, SlimConnector, CredentialsConnector
 ):
    def __init__(
        self,
@@ -106,24 +105,13 @@ class ConfluenceConnector(
        labels_to_skip: list[str] = CONFLUENCE_CONNECTOR_LABELS_TO_SKIP,
        timezone_offset: float = CONFLUENCE_TIMEZONE_OFFSET,
    ) -> None:
-        self.wiki_base = wiki_base
-        self.is_cloud = is_cloud
-        self.space = space
-        self.page_id = page_id
-        self.index_recursively = index_recursively
-        self.cql_query = cql_query
        self.batch_size = batch_size
        self.continue_on_failure = continue_on_failure
-        self.labels_to_skip = labels_to_skip
-        self.timezone_offset = timezone_offset
-        self._confluence_client: OnyxConfluence | None = None
-        self._fetched_titles: set[str] = set()
-
-        # Initialize vision LLM using the mixin
-        self.initialize_vision_llm()
+        self.is_cloud = is_cloud

        # Remove trailing slash from wiki_base if present
        self.wiki_base = wiki_base.rstrip("/")
+
        """
        If nothing is provided, we default to fetching all pages
        Only one or none of the following options should be specified so
@@ -165,6 +153,8 @@ class ConfluenceConnector(
            "max_backoff_seconds": 60,
        }

+        self._confluence_client: OnyxConfluence | None = None
+
    @property
    def confluence_client(self) -> OnyxConfluence:
        if self._confluence_client is None:
@@ -194,6 +184,7 @@ class ConfluenceConnector(
        end: SecondsSinceUnixEpoch | None = None,
    ) -> str:
        page_query = self.base_cql_page_query + self.cql_label_filter
+
        # Add time filters
        if start:
            formatted_start_time = datetime.fromtimestamp(
@@ -205,6 +196,7 @@ class ConfluenceConnector(
                "%Y-%m-%d %H:%M"
            )
            page_query += f" and lastmodified <= '{formatted_end_time}'"
+
        return page_query

    def _construct_attachment_query(self, confluence_page_id: str) -> str:
@@ -215,10 +207,11 @@ class ConfluenceConnector(

    def _get_comment_string_for_page_id(self, page_id: str) -> str:
        comment_string = ""
+
        comment_cql = f"type=comment and container='{page_id}'"
        comment_cql += self.cql_label_filter
-        expand = ",".join(_COMMENT_EXPANSION_FIELDS)

+        expand = ",".join(_COMMENT_EXPANSION_FIELDS)
        for comment in self.confluence_client.paginated_cql_retrieval(
            cql=comment_cql,
            expand=expand,
@@ -229,177 +222,123 @@ class ConfluenceConnector(
                confluence_object=comment,
                fetched_titles=set(),
            )
+
        return comment_string

-    def _convert_page_to_document(self, page: dict[str, Any]) -> Document | None:
+    def _convert_object_to_document(
+        self,
+        confluence_object: dict[str, Any],
+        parent_content_id: str | None = None,
+    ) -> Document | None:
        """
-        Converts a Confluence page to a Document object.
-        Includes the page content, comments, and attachments.
-        """
-        try:
-            # Extract basic page information
-            page_id = page["id"]
-            page_title = page["title"]
-            page_url = f"{self.wiki_base}/wiki{page['_links']['webui']}"
+        Takes in a confluence object, extracts all metadata, and converts it into a document.
+        If its a page, it extracts the text, adds the comments for the document text.
+        If its an attachment, it just downloads the attachment and converts that into a document.

-            # Get the page content
-            page_content = extract_text_from_confluence_html(
-                self.confluence_client, page, self._fetched_titles
+        parent_content_id: if the object is an attachment, specifies the content id that
+        the attachment is attached to
+        """
+        # The url and the id are the same
+        object_url = build_confluence_document_id(
+            self.wiki_base, confluence_object["_links"]["webui"], self.is_cloud
+        )
+
+        object_text = None
+        # Extract text from page
+        if confluence_object["type"] == "page":
+            object_text = extract_text_from_confluence_html(
+                confluence_client=self.confluence_client,
+                confluence_object=confluence_object,
+                fetched_titles={confluence_object.get("title", "")},
+            )
+            # Add comments to text
+            object_text += self._get_comment_string_for_page_id(confluence_object["id"])
+        elif confluence_object["type"] == "attachment":
+            object_text = attachment_to_content(
+                confluence_client=self.confluence_client,
+                attachment=confluence_object,
+                parent_content_id=parent_content_id,
            )

-            # Create the main section for the page content
-            sections = [Section(text=page_content, link=page_url)]
-
-            # Process comments if available
-            comment_text = self._get_comment_string_for_page_id(page_id)
-            if comment_text:
-                sections.append(Section(text=comment_text, link=f"{page_url}#comments"))
-
-            # Process attachments
-            if "children" in page and "attachment" in page["children"]:
-                attachments = self.confluence_client.get_attachments_for_page(
-                    page_id, expand="metadata"
-                )
-
-                for attachment in attachments.get("results", []):
-                    # Process each attachment
-                    result = process_attachment(
-                        self.confluence_client,
-                        attachment,
-                        page_title,
-                        self.image_analysis_llm,
-                    )
-
-                    if result.text:
-                        # Create a section for the attachment text
-                        attachment_section = Section(
-                            text=result.text,
-                            link=f"{page_url}#attachment-{attachment['id']}",
-                            image_file_name=result.file_name,
-                        )
-                        sections.append(attachment_section)
-                    elif result.error:
-                        logger.warning(
-                            f"Error processing attachment '{attachment.get('title')}': {result.error}"
-                        )
-
-            # Extract metadata
-            metadata = {}
-            if "space" in page:
-                metadata["space"] = page["space"].get("name", "")
-
-            # Extract labels
-            labels = []
-            if "metadata" in page and "labels" in page["metadata"]:
-                for label in page["metadata"]["labels"].get("results", []):
-                    labels.append(label.get("name", ""))
-            if labels:
-                metadata["labels"] = labels
-
-            # Extract owners
-            primary_owners = []
-            if "version" in page and "by" in page["version"]:
-                author = page["version"]["by"]
-                display_name = author.get("displayName", "Unknown")
-                primary_owners.append(BasicExpertInfo(display_name=display_name))
-
-            # Create the document
-            return Document(
-                id=build_confluence_document_id(self.wiki_base, page_id, self.is_cloud),
-                sections=sections,
-                source=DocumentSource.CONFLUENCE,
-                semantic_identifier=page_title,
-                metadata=metadata,
-                doc_updated_at=datetime_from_string(page["version"]["when"]),
-                primary_owners=primary_owners if primary_owners else None,
-            )
-        except Exception as e:
-            logger.error(f"Error converting page {page.get('id', 'unknown')}: {e}")
-            if not self.continue_on_failure:
-                raise
+        if object_text is None:
+            # This only happens for attachments that are not parseable
            return None

+        # Get space name
+        doc_metadata: dict[str, str | list[str]] = {
+            "Wiki Space Name": confluence_object["space"]["name"]
+        }
+
+        # Get labels
+        label_dicts = (
+            confluence_object.get("metadata", {}).get("labels", {}).get("results", [])
+        )
+        page_labels = [label.get("name") for label in label_dicts if label.get("name")]
+        if page_labels:
+            doc_metadata["labels"] = page_labels
+
+        # Get last modified and author email
+        version_dict = confluence_object.get("version", {})
+        last_modified = (
+            datetime_from_string(version_dict.get("when"))
+            if version_dict.get("when")
+            else None
+        )
+        author_email = version_dict.get("by", {}).get("email")
+
+        title = confluence_object.get("title", "Untitled Document")
+
+        return Document(
+            id=object_url,
+            sections=[Section(link=object_url, text=object_text)],
+            source=DocumentSource.CONFLUENCE,
+            semantic_identifier=title,
+            doc_updated_at=last_modified,
+            primary_owners=(
+                [BasicExpertInfo(email=author_email)] if author_email else None
+            ),
+            metadata=doc_metadata,
+        )
+
    def _fetch_document_batches(
        self,
        start: SecondsSinceUnixEpoch | None = None,
        end: SecondsSinceUnixEpoch | None = None,
    ) -> GenerateDocumentsOutput:
-        """
-        Yields batches of Documents. For each page:
-         - Create a Document with 1 Section for the page text/comments
-         - Then fetch attachments. For each attachment:
-             - Attempt to convert it with convert_attachment_to_content(...)
-             - If successful, create a new Section with the extracted text or summary.
-        """
        doc_batch: list[Document] = []
+        confluence_page_ids: list[str] = []

        page_query = self._construct_page_query(start, end)
        logger.debug(f"page_query: {page_query}")
-
+        # Fetch pages as Documents
        for page in self.confluence_client.paginated_cql_retrieval(
            cql=page_query,
            expand=",".join(_PAGE_EXPANSION_FIELDS),
            limit=self.batch_size,
        ):
-            # Build doc from page
-            doc = self._convert_page_to_document(page)
-            if not doc:
-                continue
-
-            # Now get attachments for that page:
-            attachment_query = self._construct_attachment_query(page["id"])
-            # We'll use the page's XML to provide context if we summarize an image
-            confluence_xml = page.get("body", {}).get("storage", {}).get("value", "")
+            logger.debug(f"_fetch_document_batches: {page['id']}")
+            confluence_page_ids.append(page["id"])
+            doc = self._convert_object_to_document(page)
+            if doc is not None:
+                doc_batch.append(doc)
+            if len(doc_batch) >= self.batch_size:
+                yield doc_batch
+                doc_batch = []

+        # Fetch attachments as Documents
+        for confluence_page_id in confluence_page_ids:
+            attachment_query = self._construct_attachment_query(confluence_page_id)
+            # TODO: maybe should add time filter as well?
            for attachment in self.confluence_client.paginated_cql_retrieval(
                cql=attachment_query,
                expand=",".join(_ATTACHMENT_EXPANSION_FIELDS),
            ):
-                attachment["metadata"].get("mediaType", "")
-                if not validate_attachment_filetype(
-                    attachment, self.image_analysis_llm
-                ):
-                    continue
-
-                # Attempt to get textual content or image summarization:
-                try:
-                    logger.info(f"Processing attachment: {attachment['title']}")
-                    response = convert_attachment_to_content(
-                        confluence_client=self.confluence_client,
-                        attachment=attachment,
-                        page_context=confluence_xml,
-                        llm=self.image_analysis_llm,
-                    )
-                    if response is None:
-                        continue
-
-                    content_text, file_storage_name = response
-
-                    object_url = build_confluence_document_id(
-                        self.wiki_base, page["_links"]["webui"], self.is_cloud
-                    )
-
-                    if content_text:
-                        doc.sections.append(
-                            Section(
-                                text=content_text,
-                                link=object_url,
-                                image_file_name=file_storage_name,
-                            )
-                        )
-                except Exception as e:
-                    logger.error(
-                        f"Failed to extract/summarize attachment {attachment['title']}",
-                        exc_info=e,
-                    )
-                    if not self.continue_on_failure:
-                        raise
-
-            doc_batch.append(doc)
-
-            if len(doc_batch) >= self.batch_size:
-                yield doc_batch
-                doc_batch = []
+                doc = self._convert_object_to_document(attachment, confluence_page_id)
+                if doc is not None:
+                    doc_batch.append(doc)
+                if len(doc_batch) >= self.batch_size:
+                    yield doc_batch
+                    doc_batch = []

        if doc_batch:
            yield doc_batch
@@ -420,63 +359,55 @@ class ConfluenceConnector(
        end: SecondsSinceUnixEpoch | None = None,
        callback: IndexingHeartbeatInterface | None = None,
    ) -> GenerateSlimDocumentOutput:
-        """
-        Return 'slim' docs (IDs + minimal permission data).
-        Does not fetch actual text. Used primarily for incremental permission sync.
-        """
        doc_metadata_list: list[SlimDocument] = []
+
        restrictions_expand = ",".join(_RESTRICTIONS_EXPANSION_FIELDS)

-        # Query pages
        page_query = self.base_cql_page_query + self.cql_label_filter
        for page in self.confluence_client.cql_paginate_all_expansions(
            cql=page_query,
            expand=restrictions_expand,
            limit=_SLIM_DOC_BATCH_SIZE,
        ):
+            # If the page has restrictions, add them to the perm_sync_data
+            # These will be used by doc_sync.py to sync permissions
            page_restrictions = page.get("restrictions")
            page_space_key = page.get("space", {}).get("key")
            page_ancestors = page.get("ancestors", [])
-
            page_perm_sync_data = {
                "restrictions": page_restrictions or {},
                "space_key": page_space_key,
-                "ancestors": page_ancestors,
+                "ancestors": page_ancestors or [],
            }

            doc_metadata_list.append(
                SlimDocument(
                    id=build_confluence_document_id(
-                        self.wiki_base, page["_links"]["webui"], self.is_cloud
+                        self.wiki_base,
+                        page["_links"]["webui"],
+                        self.is_cloud,
                    ),
                    perm_sync_data=page_perm_sync_data,
                )
            )
-
-            # Query attachments for each page
            attachment_query = self._construct_attachment_query(page["id"])
            for attachment in self.confluence_client.cql_paginate_all_expansions(
                cql=attachment_query,
                expand=restrictions_expand,
                limit=_SLIM_DOC_BATCH_SIZE,
            ):
-                # If you skip images, you'll skip them in the permission sync
-                attachment["metadata"].get("mediaType", "")
-                if not validate_attachment_filetype(
-                    attachment, self.image_analysis_llm
-                ):
+                if not validate_attachment_filetype(attachment):
                    continue
-
-                attachment_restrictions = attachment.get("restrictions", {})
+                attachment_restrictions = attachment.get("restrictions")
                if not attachment_restrictions:
-                    attachment_restrictions = page_restrictions or {}
+                    attachment_restrictions = page_restrictions

                attachment_space_key = attachment.get("space", {}).get("key")
                if not attachment_space_key:
                    attachment_space_key = page_space_key

                attachment_perm_sync_data = {
-                    "restrictions": attachment_restrictions,
+                    "restrictions": attachment_restrictions or {},
                    "space_key": attachment_space_key,
                }

@@ -490,16 +421,16 @@ class ConfluenceConnector(
                        perm_sync_data=attachment_perm_sync_data,
                    )
                )
-
            if len(doc_metadata_list) > _SLIM_DOC_BATCH_SIZE:
                yield doc_metadata_list[:_SLIM_DOC_BATCH_SIZE]
                doc_metadata_list = doc_metadata_list[_SLIM_DOC_BATCH_SIZE:]

-                if callback and callback.should_stop():
-                    raise RuntimeError(
-                        "retrieve_all_slim_documents: Stop signal detected"
-                    )
                if callback:
+                    if callback.should_stop():
+                        raise RuntimeError(
+                            "retrieve_all_slim_documents: Stop signal detected"
+                        )
+
                    callback.progress("retrieve_all_slim_documents", 1)

        yield doc_metadata_list
--- a/backend/onyx/connectors/confluence/onyx_confluence.py
+++ b/backend/onyx/connectors/confluence/onyx_confluence.py
@@ -144,12 +144,6 @@ class OnyxConfluence:
            self.static_credentials = credential_json
            return credential_json, False

-        if not OAUTH_CONFLUENCE_CLOUD_CLIENT_ID:
-            raise RuntimeError("OAUTH_CONFLUENCE_CLOUD_CLIENT_ID must be set!")
-
-        if not OAUTH_CONFLUENCE_CLOUD_CLIENT_SECRET:
-            raise RuntimeError("OAUTH_CONFLUENCE_CLOUD_CLIENT_SECRET must be set!")
-
        # check if we should refresh tokens. we're deciding to refresh halfway
        # to expiration
        now = datetime.now(timezone.utc)
--- a/backend/onyx/connectors/confluence/utils.py
+++ b/backend/onyx/connectors/confluence/utils.py
@@ -1,12 +1,9 @@
-import io
 import math
 import time
 from collections.abc import Callable
 from datetime import datetime
 from datetime import timedelta
 from datetime import timezone
-from io import BytesIO
-from pathlib import Path
 from typing import Any
 from typing import cast
 from typing import TYPE_CHECKING
@@ -15,28 +12,14 @@ from urllib.parse import parse_qs
 from urllib.parse import quote
 from urllib.parse import urlparse

+import bs4
 import requests
 from pydantic import BaseModel
-from sqlalchemy.orm import Session

-from onyx.configs.app_configs import (
-    CONFLUENCE_CONNECTOR_ATTACHMENT_CHAR_COUNT_THRESHOLD,
-)
-from onyx.configs.constants import FileOrigin
+from onyx.utils.logger import setup_logger

 if TYPE_CHECKING:
-    from onyx.connectors.confluence.onyx_confluence import OnyxConfluence
-
-from onyx.db.engine import get_session_with_current_tenant
-from onyx.db.models import PGFileStore
-from onyx.db.pg_file_store import create_populate_lobj
-from onyx.db.pg_file_store import save_bytes_to_pgfilestore
-from onyx.db.pg_file_store import upsert_pgfilestore
-from onyx.file_processing.extract_file_text import extract_file_text
-from onyx.file_processing.file_validation import is_valid_image_type
-from onyx.file_processing.image_utils import store_image_and_create_section
-from onyx.llm.interfaces import LLM
-from onyx.utils.logger import setup_logger
+    pass

 logger = setup_logger()

@@ -52,229 +35,15 @@ class TokenResponse(BaseModel):
    scope: str


-def validate_attachment_filetype(
-    attachment: dict[str, Any], llm: LLM | None = None
-) -> bool:
-    """
-    Validates if the attachment is a supported file type.
-    If LLM is provided, also checks if it's an image that can be processed.
-    """
-    attachment.get("metadata", {})
-    media_type = attachment.get("metadata", {}).get("mediaType", "")
-
-    if media_type.startswith("image/"):
-        return llm is not None and is_valid_image_type(media_type)
-
-    # For non-image files, check if we support the extension
-    title = attachment.get("title", "")
-    extension = Path(title).suffix.lstrip(".").lower() if "." in title else ""
-    return extension in ["pdf", "doc", "docx", "txt", "md", "rtf"]
-
-
-class AttachmentProcessingResult(BaseModel):
-    """
-    A container for results after processing a Confluence attachment.
-    'text' is the textual content of the attachment.
-    'file_name' is the final file name used in PGFileStore to store the content.
-    'error' holds an exception or string if something failed.
-    """
-
-    text: str | None
-    file_name: str | None
-    error: str | None = None
-
-
-def _download_attachment(
-    confluence_client: "OnyxConfluence", attachment: dict[str, Any]
-) -> bytes | None:
-    """
-    Retrieves the raw bytes of an attachment from Confluence. Returns None on error.
-    """
-    download_link = confluence_client.url + attachment["_links"]["download"]
-    resp = confluence_client._session.get(download_link)
-    if resp.status_code != 200:
-        logger.warning(
-            f"Failed to fetch {download_link} with status code {resp.status_code}"
-        )
-        return None
-    return resp.content
-
-
-def process_attachment(
-    confluence_client: "OnyxConfluence",
-    attachment: dict[str, Any],
-    page_context: str,
-    llm: LLM | None,
-) -> AttachmentProcessingResult:
-    """
-    Processes a Confluence attachment. If it's a document, extracts text,
-    or if it's an image and an LLM is available, summarizes it. Returns a structured result.
-    """
-    try:
-        # Get the media type from the attachment metadata
-        media_type = attachment.get("metadata", {}).get("mediaType", "")
-
-        # Validate the attachment type
-        if not validate_attachment_filetype(attachment, llm):
-            return AttachmentProcessingResult(
-                text=None,
-                file_name=None,
-                error=f"Unsupported file type: {media_type}",
-            )
-
-        # Download the attachment
-        raw_bytes = _download_attachment(confluence_client, attachment)
-        if raw_bytes is None:
-            return AttachmentProcessingResult(
-                text=None, file_name=None, error="Failed to download attachment"
-            )
-
-        # Process image attachments with LLM if available
-        if media_type.startswith("image/") and llm:
-            return _process_image_attachment(
-                confluence_client, attachment, page_context, llm, raw_bytes, media_type
-            )
-
-        # Process document attachments
-        try:
-            text = extract_file_text(
-                file=BytesIO(raw_bytes),
-                file_name=attachment["title"],
-            )
-
-            # Skip if the text is too long
-            if len(text) > CONFLUENCE_CONNECTOR_ATTACHMENT_CHAR_COUNT_THRESHOLD:
-                return AttachmentProcessingResult(
-                    text=None,
-                    file_name=None,
-                    error=f"Attachment text too long: {len(text)} chars",
-                )
-
-            return AttachmentProcessingResult(text=text, file_name=None, error=None)
-        except Exception as e:
-            return AttachmentProcessingResult(
-                text=None, file_name=None, error=f"Failed to extract text: {e}"
-            )
-
-    except Exception as e:
-        return AttachmentProcessingResult(
-            text=None, file_name=None, error=f"Failed to process attachment: {e}"
-        )
-
-
-def _process_image_attachment(
-    confluence_client: "OnyxConfluence",
-    attachment: dict[str, Any],
-    page_context: str,
-    llm: LLM,
-    raw_bytes: bytes,
-    media_type: str,
-) -> AttachmentProcessingResult:
-    """Process an image attachment by saving it and generating a summary."""
-    try:
-        # Use the standardized image storage and section creation
-        with get_session_with_current_tenant() as db_session:
-            section, file_name = store_image_and_create_section(
-                db_session=db_session,
-                image_data=raw_bytes,
-                file_name=Path(attachment["id"]).name,
-                display_name=attachment["title"],
-                media_type=media_type,
-                llm=llm,
-                file_origin=FileOrigin.CONNECTOR,
-            )
-
-            return AttachmentProcessingResult(
-                text=section.text, file_name=file_name, error=None
-            )
-    except Exception as e:
-        msg = f"Image summarization failed for {attachment['title']}: {e}"
-        logger.error(msg, exc_info=e)
-        return AttachmentProcessingResult(text=None, file_name=None, error=msg)
-
-
-def _process_text_attachment(
-    attachment: dict[str, Any],
-    raw_bytes: bytes,
-    media_type: str,
-) -> AttachmentProcessingResult:
-    """Process a text-based attachment by extracting its content."""
-    try:
-        extracted_text = extract_file_text(
-            io.BytesIO(raw_bytes),
-            file_name=attachment["title"],
-            break_on_unprocessable=False,
-        )
-    except Exception as e:
-        msg = f"Failed to extract text for '{attachment['title']}': {e}"
-        logger.error(msg, exc_info=e)
-        return AttachmentProcessingResult(text=None, file_name=None, error=msg)
-
-    # Check length constraints
-    if extracted_text is None or len(extracted_text) == 0:
-        msg = f"No text extracted for {attachment['title']}"
-        logger.warning(msg)
-        return AttachmentProcessingResult(text=None, file_name=None, error=msg)
-
-    if len(extracted_text) > CONFLUENCE_CONNECTOR_ATTACHMENT_CHAR_COUNT_THRESHOLD:
-        msg = (
-            f"Skipping attachment {attachment['title']} due to char count "
-            f"({len(extracted_text)} > {CONFLUENCE_CONNECTOR_ATTACHMENT_CHAR_COUNT_THRESHOLD})"
-        )
-        logger.warning(msg)
-        return AttachmentProcessingResult(text=None, file_name=None, error=msg)
-
-    # Save the attachment
-    try:
-        with get_session_with_current_tenant() as db_session:
-            saved_record = save_bytes_to_pgfilestore(
-                db_session=db_session,
-                raw_bytes=raw_bytes,
-                media_type=media_type,
-                identifier=attachment["id"],
-                display_name=attachment["title"],
-            )
-    except Exception as e:
-        msg = f"Failed to save attachment '{attachment['title']}' to PG: {e}"
-        logger.error(msg, exc_info=e)
-        return AttachmentProcessingResult(
-            text=extracted_text, file_name=None, error=msg
-        )
-
-    return AttachmentProcessingResult(
-        text=extracted_text, file_name=saved_record.file_name, error=None
-    )
-
-
-def convert_attachment_to_content(
-    confluence_client: "OnyxConfluence",
-    attachment: dict[str, Any],
-    page_context: str,
-    llm: LLM | None,
-) -> tuple[str | None, str | None] | None:
-    """
-    Facade function which:
-      1. Validates attachment type
-      2. Extracts or summarizes content
-      3. Returns (content_text, stored_file_name) or None if we should skip it
-    """
-    media_type = attachment["metadata"]["mediaType"]
-    # Quick check for unsupported types:
-    if media_type.startswith("video/") or media_type == "application/gliffy+json":
-        logger.warning(
-            f"Skipping unsupported attachment type: '{media_type}' for {attachment['title']}"
-        )
-        return None
-
-    result = process_attachment(confluence_client, attachment, page_context, llm)
-    if result.error is not None:
-        logger.warning(
-            f"Attachment {attachment['title']} encountered error: {result.error}"
-        )
-        return None
-
-    # Return the text and the file name
-    return result.text, result.file_name
+def validate_attachment_filetype(attachment: dict[str, Any]) -> bool:
+    return attachment["metadata"]["mediaType"] not in [
+        "image/jpeg",
+        "image/png",
+        "image/gif",
+        "image/svg+xml",
+        "video/mp4",
+        "video/quicktime",
+    ]


 def build_confluence_document_id(
@@ -295,6 +64,23 @@ def build_confluence_document_id(
    return f"{base_url}{content_url}"


+def _extract_referenced_attachment_names(page_text: str) -> list[str]:
+    """Parse a Confluence html page to generate a list of current
+        attachments in use
+
+    Args:
+        text (str): The page content
+
+    Returns:
+        list[str]: List of filenames currently in use by the page text
+    """
+    referenced_attachment_filenames = []
+    soup = bs4.BeautifulSoup(page_text, "html.parser")
+    for attachment in soup.findAll("ri:attachment"):
+        referenced_attachment_filenames.append(attachment.attrs["ri:filename"])
+    return referenced_attachment_filenames
+
+
 def datetime_from_string(datetime_string: str) -> datetime:
    datetime_object = datetime.fromisoformat(datetime_string)

@@ -466,37 +252,3 @@ def update_param_in_path(path: str, param: str, value: str) -> str:
        + "?"
        + "&".join(f"{k}={quote(v[0])}" for k, v in query_params.items())
    )
-
-
-def attachment_to_file_record(
-    confluence_client: "OnyxConfluence",
-    attachment: dict[str, Any],
-    db_session: Session,
-) -> tuple[PGFileStore, bytes]:
-    """Save an attachment to the file store and return the file record."""
-    download_link = _attachment_to_download_link(confluence_client, attachment)
-    image_data = confluence_client.get(
-        download_link, absolute=True, not_json_response=True
-    )
-
-    # Save image to file store
-    file_name = f"confluence_attachment_{attachment['id']}"
-    lobj_oid = create_populate_lobj(BytesIO(image_data), db_session)
-    pgfilestore = upsert_pgfilestore(
-        file_name=file_name,
-        display_name=attachment["title"],
-        file_origin=FileOrigin.OTHER,
-        file_type=attachment["metadata"]["mediaType"],
-        lobj_oid=lobj_oid,
-        db_session=db_session,
-        commit=True,
-    )
-
-    return pgfilestore, image_data
-
-
-def _attachment_to_download_link(
-    confluence_client: "OnyxConfluence", attachment: dict[str, Any]
-) -> str:
-    """Extracts the download link to images."""
-    return confluence_client.url + attachment["_links"]["download"]
--- a/backend/onyx/connectors/file/connector.py
+++ b/backend/onyx/connectors/file/connector.py
@@ -10,23 +10,22 @@ from sqlalchemy.orm import Session

 from onyx.configs.app_configs import INDEX_BATCH_SIZE
 from onyx.configs.constants import DocumentSource
-from onyx.configs.constants import FileOrigin
 from onyx.connectors.cross_connector_utils.miscellaneous_utils import time_str_to_utc
 from onyx.connectors.interfaces import GenerateDocumentsOutput
 from onyx.connectors.interfaces import LoadConnector
 from onyx.connectors.models import BasicExpertInfo
 from onyx.connectors.models import Document
 from onyx.connectors.models import Section
-from onyx.connectors.vision_enabled_connector import VisionEnabledConnector
 from onyx.db.engine import get_session_with_current_tenant
-from onyx.db.pg_file_store import get_pgfilestore_by_file_name
-from onyx.file_processing.extract_file_text import extract_text_and_images
+from onyx.file_processing.extract_file_text import detect_encoding
+from onyx.file_processing.extract_file_text import extract_file_text
 from onyx.file_processing.extract_file_text import get_file_ext
+from onyx.file_processing.extract_file_text import is_text_file_extension
 from onyx.file_processing.extract_file_text import is_valid_file_ext
 from onyx.file_processing.extract_file_text import load_files_from_zip
-from onyx.file_processing.image_utils import store_image_and_create_section
+from onyx.file_processing.extract_file_text import read_pdf_file
+from onyx.file_processing.extract_file_text import read_text_file
 from onyx.file_store.file_store import get_default_file_store
-from onyx.llm.interfaces import LLM
 from onyx.utils.logger import setup_logger

 logger = setup_logger()
@@ -36,115 +35,81 @@ def _read_files_and_metadata(
    file_name: str,
    db_session: Session,
 ) -> Iterator[tuple[str, IO, dict[str, Any]]]:
-    """
-    Reads the file from Postgres. If the file is a .zip, yields subfiles.
-    """
+    """Reads the file into IO, in the case of a zip file, yields each individual
+    file contained within, also includes the metadata dict if packaged in the zip"""
    extension = get_file_ext(file_name)
    metadata: dict[str, Any] = {}
    directory_path = os.path.dirname(file_name)

-    # Read file from Postgres store
    file_content = get_default_file_store(db_session).read_file(file_name, mode="b")

-    # If it's a zip, expand it
    if extension == ".zip":
-        for file_info, subfile, metadata in load_files_from_zip(
+        for file_info, file, metadata in load_files_from_zip(
            file_content, ignore_dirs=True
        ):
-            yield os.path.join(directory_path, file_info.filename), subfile, metadata
+            yield os.path.join(directory_path, file_info.filename), file, metadata
    elif is_valid_file_ext(extension):
        yield file_name, file_content, metadata
    else:
        logger.warning(f"Skipping file '{file_name}' with extension '{extension}'")


-def _create_image_section(
-    llm: LLM | None,
-    image_data: bytes,
-    db_session: Session,
-    parent_file_name: str,
-    display_name: str,
-    idx: int = 0,
-) -> tuple[Section, str | None]:
-    """
-    Create a Section object for a single image and store the image in PGFileStore.
-    If summarization is enabled and we have an LLM, summarize the image.
-
-    Returns:
-        tuple: (Section object, file_name in PGFileStore or None if storage failed)
-    """
-    # Create a unique file name for the embedded image
-    file_name = f"{parent_file_name}_embedded_{idx}"
-
-    # Use the standardized utility to store the image and create a section
-    return store_image_and_create_section(
-        db_session=db_session,
-        image_data=image_data,
-        file_name=file_name,
-        display_name=display_name,
-        llm=llm,
-        file_origin=FileOrigin.OTHER,
-    )
-
-
 def _process_file(
    file_name: str,
    file: IO[Any],
-    metadata: dict[str, Any] | None,
-    pdf_pass: str | None,
-    db_session: Session,
-    llm: LLM | None,
+    metadata: dict[str, Any] | None = None,
+    pdf_pass: str | None = None,
 ) -> list[Document]:
-    """
-    Processes a single file, returning a list of Documents (typically one).
-    Also handles embedded images if 'EMBEDDED_IMAGE_EXTRACTION_ENABLED' is true.
-    """
    extension = get_file_ext(file_name)
-
-    # Fetch the DB record so we know the ID for internal URL
-    pg_record = get_pgfilestore_by_file_name(file_name=file_name, db_session=db_session)
-    if not pg_record:
-        logger.warning(f"No file record found for '{file_name}' in PG; skipping.")
-        return []
-
    if not is_valid_file_ext(extension):
-        logger.warning(
-            f"Skipping file '{file_name}' with unrecognized extension '{extension}'"
-        )
+        logger.warning(f"Skipping file '{file_name}' with extension '{extension}'")
        return []

-    # Prepare doc metadata
-    if metadata is None:
-        metadata = {}
-    file_display_name = metadata.get("file_display_name") or os.path.basename(file_name)
+    file_metadata: dict[str, Any] = {}

-    # Timestamps
-    current_datetime = datetime.now(timezone.utc)
-    time_updated = metadata.get("time_updated", current_datetime)
+    if is_text_file_extension(file_name):
+        encoding = detect_encoding(file)
+        file_content_raw, file_metadata = read_text_file(
+            file, encoding=encoding, ignore_onyx_metadata=False
+        )
+
+    # Using the PDF reader function directly to pass in password cleanly
+    elif extension == ".pdf" and pdf_pass is not None:
+        file_content_raw, file_metadata = read_pdf_file(file=file, pdf_pass=pdf_pass)
+
+    else:
+        file_content_raw = extract_file_text(
+            file=file,
+            file_name=file_name,
+            break_on_unprocessable=True,
+        )
+
+    all_metadata = {**metadata, **file_metadata} if metadata else file_metadata
+
+    # add a prefix to avoid conflicts with other connectors
+    doc_id = f"FILE_CONNECTOR__{file_name}"
+    if metadata:
+        doc_id = metadata.get("document_id") or doc_id
+
+    # If this is set, we will show this in the UI as the "name" of the file
+    file_display_name = all_metadata.get("file_display_name") or os.path.basename(
+        file_name
+    )
+    title = (
+        all_metadata["title"] or "" if "title" in all_metadata else file_display_name
+    )
+
+    time_updated = all_metadata.get("time_updated", datetime.now(timezone.utc))
    if isinstance(time_updated, str):
        time_updated = time_str_to_utc(time_updated)

-    dt_str = metadata.get("doc_updated_at")
+    dt_str = all_metadata.get("doc_updated_at")
    final_time_updated = time_str_to_utc(dt_str) if dt_str else time_updated

-    # Collect owners
-    p_owner_names = metadata.get("primary_owners")
-    s_owner_names = metadata.get("secondary_owners")
-    p_owners = (
-        [BasicExpertInfo(display_name=name) for name in p_owner_names]
-        if p_owner_names
-        else None
-    )
-    s_owners = (
-        [BasicExpertInfo(display_name=name) for name in s_owner_names]
-        if s_owner_names
-        else None
-    )
-
-    # Additional tags we store as doc metadata
+    # Metadata tags separate from the Onyx specific fields
    metadata_tags = {
        k: v
-        for k, v in metadata.items()
+        for k, v in all_metadata.items()
        if k
        not in [
            "document_id",
@@ -157,142 +122,77 @@ def _process_file(
            "file_display_name",
            "title",
            "connector_type",
-            "pdf_password",
        ]
    }

-    source_type_str = metadata.get("connector_type")
-    source_type = (
-        DocumentSource(source_type_str) if source_type_str else DocumentSource.FILE
+    source_type_str = all_metadata.get("connector_type")
+    source_type = DocumentSource(source_type_str) if source_type_str else None
+
+    p_owner_names = all_metadata.get("primary_owners")
+    s_owner_names = all_metadata.get("secondary_owners")
+    p_owners = (
+        [BasicExpertInfo(display_name=name) for name in p_owner_names]
+        if p_owner_names
+        else None
+    )
+    s_owners = (
+        [BasicExpertInfo(display_name=name) for name in s_owner_names]
+        if s_owner_names
+        else None
    )

-    doc_id = metadata.get("document_id") or f"FILE_CONNECTOR__{file_name}"
-    title = metadata.get("title") or file_display_name
-
-    # 1) If the file itself is an image, handle that scenario quickly
-    IMAGE_EXTENSIONS = {".jpg", ".jpeg", ".png", ".webp"}
-    if extension in IMAGE_EXTENSIONS:
-        # Summarize or produce empty doc
-        image_data = file.read()
-        image_section, _ = _create_image_section(
-            llm, image_data, db_session, pg_record.file_name, title
-        )
-        return [
-            Document(
-                id=doc_id,
-                sections=[image_section],
-                source=source_type,
-                semantic_identifier=file_display_name,
-                title=title,
-                doc_updated_at=final_time_updated,
-                primary_owners=p_owners,
-                secondary_owners=s_owners,
-                metadata=metadata_tags,
-            )
-        ]
-
-    # 2) Otherwise: text-based approach. Possibly with embedded images if enabled.
-    #    (For example .docx with inline images).
-    file.seek(0)
-    text_content = ""
-    embedded_images: list[tuple[bytes, str]] = []
-
-    text_content, embedded_images = extract_text_and_images(
-        file=file,
-        file_name=file_name,
-        pdf_pass=pdf_pass,
-    )
-
-    # Build sections: first the text as a single Section
-    sections = []
-    link_in_meta = metadata.get("link")
-    if text_content.strip():
-        sections.append(Section(link=link_in_meta, text=text_content.strip()))
-
-    # Then any extracted images from docx, etc.
-    for idx, (img_data, img_name) in enumerate(embedded_images, start=1):
-        # Store each embedded image as a separate file in PGFileStore
-        # and create a section with the image summary
-        image_section, _ = _create_image_section(
-            llm,
-            img_data,
-            db_session,
-            pg_record.file_name,
-            f"{title} - image {idx}",
-            idx,
-        )
-        sections.append(image_section)
    return [
        Document(
            id=doc_id,
-            sections=sections,
-            source=source_type,
+            sections=[
+                Section(link=all_metadata.get("link"), text=file_content_raw.strip())
+            ],
+            source=source_type or DocumentSource.FILE,
            semantic_identifier=file_display_name,
            title=title,
            doc_updated_at=final_time_updated,
            primary_owners=p_owners,
            secondary_owners=s_owners,
+            # currently metadata just houses tags, other stuff like owners / updated at have dedicated fields
            metadata=metadata_tags,
        )
    ]


-class LocalFileConnector(LoadConnector, VisionEnabledConnector):
-    """
-    Connector that reads files from Postgres and yields Documents, including
-    optional embedded image extraction.
-    """
-
+class LocalFileConnector(LoadConnector):
    def __init__(
        self,
        file_locations: list[Path | str],
        batch_size: int = INDEX_BATCH_SIZE,
    ) -> None:
-        self.file_locations = [str(loc) for loc in file_locations]
+        self.file_locations = [Path(file_location) for file_location in file_locations]
        self.batch_size = batch_size
        self.pdf_pass: str | None = None

-        # Initialize vision LLM using the mixin
-        self.initialize_vision_llm()
-
    def load_credentials(self, credentials: dict[str, Any]) -> dict[str, Any] | None:
        self.pdf_pass = credentials.get("pdf_password")
-
        return None

    def load_from_state(self) -> GenerateDocumentsOutput:
-        """
-        Iterates over each file path, fetches from Postgres, tries to parse text
-        or images, and yields Document batches.
-        """
        documents: list[Document] = []

        with get_session_with_current_tenant() as db_session:
            for file_path in self.file_locations:
                current_datetime = datetime.now(timezone.utc)
-
-                files_iter = _read_files_and_metadata(
-                    file_name=file_path,
-                    db_session=db_session,
+                files = _read_files_and_metadata(
+                    file_name=str(file_path), db_session=db_session
                )

-                for actual_file_name, file, metadata in files_iter:
+                for file_name, file, metadata in files:
                    metadata["time_updated"] = metadata.get(
                        "time_updated", current_datetime
                    )
-                    new_docs = _process_file(
-                        file_name=actual_file_name,
-                        file=file,
-                        metadata=metadata,
-                        pdf_pass=self.pdf_pass,
-                        db_session=db_session,
-                        llm=self.image_analysis_llm,
+                    documents.extend(
+                        _process_file(file_name, file, metadata, self.pdf_pass)
                    )
-                    documents.extend(new_docs)

                    if len(documents) >= self.batch_size:
                        yield documents
-
                        documents = []

            if documents:
@@ -301,7 +201,7 @@ class LocalFileConnector(LoadConnector, VisionEnabledConnector):

 if __name__ == "__main__":
    connector = LocalFileConnector(file_locations=[os.environ["TEST_FILE"]])
-    connector.load_credentials({"pdf_password": os.environ.get("PDF_PASSWORD")})
-    doc_batches = connector.load_from_state()
-    for batch in doc_batches:
-        print("BATCH:", batch)
+    connector.load_credentials({"pdf_password": os.environ["PDF_PASSWORD"]})
+
+    document_batches = connector.load_from_state()
+    print(next(document_batches))
--- a/backend/onyx/connectors/google_drive/connector.py
+++ b/backend/onyx/connectors/google_drive/connector.py
@@ -4,12 +4,14 @@ from concurrent.futures import as_completed
 from concurrent.futures import ThreadPoolExecutor
 from functools import partial
 from typing import Any
+from typing import cast

 from google.oauth2.credentials import Credentials as OAuthCredentials  # type: ignore
 from google.oauth2.service_account import Credentials as ServiceAccountCredentials  # type: ignore
 from googleapiclient.errors import HttpError  # type: ignore

 from onyx.configs.app_configs import INDEX_BATCH_SIZE
+from onyx.configs.app_configs import MAX_FILE_SIZE_BYTES
 from onyx.configs.constants import DocumentSource
 from onyx.connectors.exceptions import ConnectorValidationError
 from onyx.connectors.exceptions import CredentialExpiredError
@@ -34,6 +36,7 @@ from onyx.connectors.google_utils.shared_constants import (
 )
 from onyx.connectors.google_utils.shared_constants import MISSING_SCOPES_ERROR_STR
 from onyx.connectors.google_utils.shared_constants import ONYX_SCOPE_INSTRUCTIONS
+from onyx.connectors.google_utils.shared_constants import SCOPE_DOC_URL
 from onyx.connectors.google_utils.shared_constants import SLIM_BATCH_SIZE
 from onyx.connectors.google_utils.shared_constants import USER_FIELDS
 from onyx.connectors.interfaces import GenerateDocumentsOutput
@@ -43,9 +46,7 @@ from onyx.connectors.interfaces import PollConnector
 from onyx.connectors.interfaces import SecondsSinceUnixEpoch
 from onyx.connectors.interfaces import SlimConnector
 from onyx.connectors.models import ConnectorMissingCredentialError
-from onyx.connectors.vision_enabled_connector import VisionEnabledConnector
 from onyx.indexing.indexing_heartbeat import IndexingHeartbeatInterface
-from onyx.llm.interfaces import LLM
 from onyx.utils.logger import setup_logger
 from onyx.utils.retry_wrapper import retry_builder

@@ -65,10 +66,7 @@ def _extract_ids_from_urls(urls: list[str]) -> list[str]:


 def _convert_single_file(
-    creds: Any,
-    primary_admin_email: str,
-    file: dict[str, Any],
-    image_analysis_llm: LLM | None,
+    creds: Any, primary_admin_email: str, file: dict[str, Any]
 ) -> Any:
    user_email = file.get("owners", [{}])[0].get("emailAddress") or primary_admin_email
    user_drive_service = get_drive_service(creds, user_email=user_email)
@@ -77,14 +75,11 @@ def _convert_single_file(
        file=file,
        drive_service=user_drive_service,
        docs_service=docs_service,
-        image_analysis_llm=image_analysis_llm,  # pass the LLM so doc_conversion can summarize images
    )


 def _process_files_batch(
-    files: list[GoogleDriveFileType],
-    convert_func: Callable[[GoogleDriveFileType], Any],
-    batch_size: int,
+    files: list[GoogleDriveFileType], convert_func: Callable, batch_size: int
 ) -> GenerateDocumentsOutput:
    doc_batch = []
    with ThreadPoolExecutor(max_workers=min(16, len(files))) as executor:
@@ -116,9 +111,7 @@ def _clean_requested_drive_ids(
    return valid_requested_drive_ids, filtered_folder_ids


-class GoogleDriveConnector(
-    LoadConnector, PollConnector, SlimConnector, VisionEnabledConnector
-):
+class GoogleDriveConnector(LoadConnector, PollConnector, SlimConnector):
    def __init__(
        self,
        include_shared_drives: bool = False,
@@ -136,23 +129,23 @@ class GoogleDriveConnector(
        continue_on_failure: bool | None = None,
    ) -> None:
        # Check for old input parameters
-        if folder_paths is not None:
-            logger.warning(
-                "The 'folder_paths' parameter is deprecated. Use 'shared_folder_urls' instead."
+        if (
+            folder_paths is not None
+            or include_shared is not None
+            or follow_shortcuts is not None
+            or only_org_public is not None
+            or continue_on_failure is not None
+        ):
+            logger.exception(
+                "Google Drive connector received old input parameters. "
+                "Please visit the docs for help with the new setup: "
+                f"{SCOPE_DOC_URL}"
            )
-        if include_shared is not None:
-            logger.warning(
-                "The 'include_shared' parameter is deprecated. Use 'include_files_shared_with_me' instead."
+            raise ConnectorValidationError(
+                "Google Drive connector received old input parameters. "
+                "Please visit the docs for help with the new setup: "
+                f"{SCOPE_DOC_URL}"
            )
-        if follow_shortcuts is not None:
-            logger.warning("The 'follow_shortcuts' parameter is deprecated.")
-        if only_org_public is not None:
-            logger.warning("The 'only_org_public' parameter is deprecated.")
-        if continue_on_failure is not None:
-            logger.warning("The 'continue_on_failure' parameter is deprecated.")
-
-        # Initialize vision LLM using the mixin
-        self.initialize_vision_llm()

        if (
            not include_shared_drives
@@ -244,7 +237,6 @@ class GoogleDriveConnector(
            credentials=credentials,
            source=DocumentSource.GOOGLE_DRIVE,
        )
-
        return new_creds_dict

    def _update_traversed_parent_ids(self, folder_id: str) -> None:
@@ -531,53 +523,37 @@ class GoogleDriveConnector(
        end: SecondsSinceUnixEpoch | None = None,
    ) -> GenerateDocumentsOutput:
        # Create a larger process pool for file conversion
-        with ThreadPoolExecutor(max_workers=8) as executor:
-            # Prepare a partial function with the credentials and admin email
-            convert_func = partial(
-                _convert_single_file,
-                self.creds,
-                self.primary_admin_email,
-                image_analysis_llm=self.image_analysis_llm,  # Use the mixin's LLM
+        convert_func = partial(
+            _convert_single_file, self.creds, self.primary_admin_email
+        )
+
+        # Process files in larger batches
+        LARGE_BATCH_SIZE = self.batch_size * 4
+        files_to_process = []
+        # Gather the files into batches to be processed in parallel
+        for file in self._fetch_drive_items(is_slim=False, start=start, end=end):
+            if (
+                file.get("size")
+                and int(cast(str, file.get("size"))) > MAX_FILE_SIZE_BYTES
+            ):
+                logger.warning(
+                    f"Skipping file {file.get('name', 'Unknown')} as it is too large: {file.get('size')} bytes"
+                )
+                continue
+
+            files_to_process.append(file)
+            if len(files_to_process) >= LARGE_BATCH_SIZE:
+                yield from _process_files_batch(
+                    files_to_process, convert_func, self.batch_size
+                )
+                files_to_process = []
+
+        # Process any remaining files
+        if files_to_process:
+            yield from _process_files_batch(
+                files_to_process, convert_func, self.batch_size
            )

-            # Fetch files in batches
-            files_batch: list[GoogleDriveFileType] = []
-            for file in self._fetch_drive_items(is_slim=False, start=start, end=end):
-                files_batch.append(file)
-
-                if len(files_batch) >= self.batch_size:
-                    # Process the batch
-                    futures = [
-                        executor.submit(convert_func, file) for file in files_batch
-                    ]
-                    documents = []
-                    for future in as_completed(futures):
-                        try:
-                            doc = future.result()
-                            if doc is not None:
-                                documents.append(doc)
-                        except Exception as e:
-                            logger.error(f"Error converting file: {e}")
-
-                    if documents:
-                        yield documents
-                    files_batch = []
-
-            # Process any remaining files
-            if files_batch:
-                futures = [executor.submit(convert_func, file) for file in files_batch]
-                documents = []
-                for future in as_completed(futures):
-                    try:
-                        doc = future.result()
-                        if doc is not None:
-                            documents.append(doc)
-                    except Exception as e:
-                        logger.error(f"Error converting file: {e}")
-
-                if documents:
-                    yield documents
-
    def load_from_state(self) -> GenerateDocumentsOutput:
        try:
            yield from self._extract_docs_from_google_drive()
--- a/backend/onyx/connectors/google_drive/doc_conversion.py
+++ b/backend/onyx/connectors/google_drive/doc_conversion.py
@@ -9,7 +9,7 @@ from googleapiclient.errors import HttpError  # type: ignore

 from onyx.configs.app_configs import CONTINUE_ON_CONNECTOR_FAILURE
 from onyx.configs.constants import DocumentSource
-from onyx.configs.constants import FileOrigin
+from onyx.configs.constants import IGNORE_FOR_QA
 from onyx.connectors.google_drive.constants import DRIVE_FOLDER_TYPE
 from onyx.connectors.google_drive.constants import DRIVE_SHORTCUT_TYPE
 from onyx.connectors.google_drive.constants import UNSUPPORTED_FILE_TYPE_CONTENT
@@ -21,88 +21,32 @@ from onyx.connectors.google_utils.resources import GoogleDriveService
 from onyx.connectors.models import Document
 from onyx.connectors.models import Section
 from onyx.connectors.models import SlimDocument
-from onyx.db.engine import get_session_with_current_tenant
-from onyx.file_processing.extract_file_text import docx_to_text_and_images
+from onyx.file_processing.extract_file_text import docx_to_text
 from onyx.file_processing.extract_file_text import pptx_to_text
 from onyx.file_processing.extract_file_text import read_pdf_file
-from onyx.file_processing.file_validation import is_valid_image_type
-from onyx.file_processing.image_summarization import summarize_image_with_error_handling
-from onyx.file_processing.image_utils import store_image_and_create_section
 from onyx.file_processing.unstructured import get_unstructured_api_key
 from onyx.file_processing.unstructured import unstructured_to_text
-from onyx.llm.interfaces import LLM
 from onyx.utils.logger import setup_logger

 logger = setup_logger()


-def _summarize_drive_image(
-    image_data: bytes, image_name: str, image_analysis_llm: LLM | None
-) -> str:
-    """
-    Summarize the given image using the provided LLM.
-    """
-    if not image_analysis_llm:
-        return ""
-
-    return (
-        summarize_image_with_error_handling(
-            llm=image_analysis_llm,
-            image_data=image_data,
-            context_name=image_name,
-        )
-        or ""
-    )
-
-
-def is_gdrive_image_mime_type(mime_type: str) -> bool:
-    """
-    Return True if the mime_type is a common image type in GDrive.
-    (e.g. 'image/png', 'image/jpeg')
-    """
-    return is_valid_image_type(mime_type)
+# these errors don't represent a failure in the connector, but simply files
+# that can't / shouldn't be indexed
+ERRORS_TO_CONTINUE_ON = [
+    "cannotExportFile",
+    "exportSizeLimitExceeded",
+    "cannotDownloadFile",
+]


 def _extract_sections_basic(
-    file: dict[str, str],
-    service: GoogleDriveService,
-    image_analysis_llm: LLM | None = None,
+    file: dict[str, str], service: GoogleDriveService
 ) -> list[Section]:
-    """
-    Extends the existing logic to handle either a docx with embedded images
-    or standalone images (PNG, JPG, etc).
-    """
    mime_type = file["mimeType"]
    link = file["webViewLink"]
-    file_name = file.get("name", file["id"])
    supported_file_types = set(item.value for item in GDriveMimeType)

-    # 1) If the file is an image, retrieve the raw bytes, optionally summarize
-    if is_gdrive_image_mime_type(mime_type):
-        try:
-            response = service.files().get_media(fileId=file["id"]).execute()
-
-            with get_session_with_current_tenant() as db_session:
-                section, _ = store_image_and_create_section(
-                    db_session=db_session,
-                    image_data=response,
-                    file_name=file["id"],
-                    display_name=file_name,
-                    media_type=mime_type,
-                    llm=image_analysis_llm,
-                    file_origin=FileOrigin.CONNECTOR,
-                )
-                return [section]
-        except Exception as e:
-            logger.warning(f"Failed to fetch or summarize image: {e}")
-            return [
-                Section(
-                    link=link,
-                    text="",
-                    image_file_name=link,
-                )
-            ]
-
    if mime_type not in supported_file_types:
        # Unsupported file types can still have a title, finding this way is still useful
        return [Section(link=link, text=UNSUPPORTED_FILE_TYPE_CONTENT)]
@@ -241,63 +185,45 @@ def _extract_sections_basic(
            GDriveMimeType.PLAIN_TEXT.value,
            GDriveMimeType.MARKDOWN.value,
        ]:
-            text_data = (
-                service.files().get_media(fileId=file["id"]).execute().decode("utf-8")
-            )
-            return [Section(link=link, text=text_data)]
-
+            return [
+                Section(
+                    link=link,
+                    text=service.files()
+                    .get_media(fileId=file["id"])
+                    .execute()
+                    .decode("utf-8"),
+                )
+            ]
        # ---------------------------
        # Word, PowerPoint, PDF files
-        elif mime_type in [
+        if mime_type in [
            GDriveMimeType.WORD_DOC.value,
            GDriveMimeType.POWERPOINT.value,
            GDriveMimeType.PDF.value,
        ]:
-            response_bytes = service.files().get_media(fileId=file["id"]).execute()
-
-            # Optionally use Unstructured
+            response = service.files().get_media(fileId=file["id"]).execute()
            if get_unstructured_api_key():
-                text = unstructured_to_text(
-                    file=io.BytesIO(response_bytes),
-                    file_name=file_name,
-                )
-                return [Section(link=link, text=text)]
+                return [
+                    Section(
+                        link=link,
+                        text=unstructured_to_text(
+                            file=io.BytesIO(response),
+                            file_name=file.get("name", file["id"]),
+                        ),
+                    )
+                ]

            if mime_type == GDriveMimeType.WORD_DOC.value:
-                # Use docx_to_text_and_images to get text plus embedded images
-                text, embedded_images = docx_to_text_and_images(
-                    file=io.BytesIO(response_bytes),
-                )
-                sections = []
-                if text.strip():
-                    sections.append(Section(link=link, text=text.strip()))
-
-                # Process each embedded image using the standardized function
-                with get_session_with_current_tenant() as db_session:
-                    for idx, (img_data, img_name) in enumerate(
-                        embedded_images, start=1
-                    ):
-                        # Create a unique identifier for the embedded image
-                        embedded_id = f"{file['id']}_embedded_{idx}"
-
-                        section, _ = store_image_and_create_section(
-                            db_session=db_session,
-                            image_data=img_data,
-                            file_name=embedded_id,
-                            display_name=img_name or f"{file_name} - image {idx}",
-                            llm=image_analysis_llm,
-                            file_origin=FileOrigin.CONNECTOR,
-                        )
-                        sections.append(section)
-                return sections
-
+                return [
+                    Section(link=link, text=docx_to_text(file=io.BytesIO(response)))
+                ]
            elif mime_type == GDriveMimeType.PDF.value:
-                text, _pdf_meta, images = read_pdf_file(io.BytesIO(response_bytes))
+                text, _ = read_pdf_file(file=io.BytesIO(response))
                return [Section(link=link, text=text)]
-
            elif mime_type == GDriveMimeType.POWERPOINT.value:
-                text_data = pptx_to_text(io.BytesIO(response_bytes))
-                return [Section(link=link, text=text_data)]
+                return [
+                    Section(link=link, text=pptx_to_text(file=io.BytesIO(response)))
+                ]

        # Catch-all case, should not happen since there should be specific handling
        # for each of the supported file types
@@ -305,8 +231,7 @@ def _extract_sections_basic(
        logger.error(error_message)
        raise ValueError(error_message)

-    except Exception as e:
-        logger.exception(f"Error extracting sections from file: {e}")
+    except Exception:
        return [Section(link=link, text=UNSUPPORTED_FILE_TYPE_CONTENT)]


@@ -314,62 +239,74 @@ def convert_drive_item_to_document(
    file: GoogleDriveFileType,
    drive_service: GoogleDriveService,
    docs_service: GoogleDocsService,
-    image_analysis_llm: LLM | None,
 ) -> Document | None:
-    """
-    Main entry point for converting a Google Drive file => Document object.
-    Now we accept an optional `llm` to pass to `_extract_sections_basic`.
-    """
    try:
-        # skip shortcuts or folders
-        if file.get("mimeType") in [DRIVE_SHORTCUT_TYPE, DRIVE_FOLDER_TYPE]:
-            logger.info("Skipping shortcut/folder.")
+        # Skip files that are shortcuts
+        if file.get("mimeType") == DRIVE_SHORTCUT_TYPE:
+            logger.info("Ignoring Drive Shortcut Filetype")
+            return None
+        # Skip files that are folders
+        if file.get("mimeType") == DRIVE_FOLDER_TYPE:
+            logger.info("Ignoring Drive Folder Filetype")
            return None

-        # If it's a Google Doc, we might do advanced parsing
        sections: list[Section] = []
+
+        # Special handling for Google Docs to preserve structure, link
+        # to headers
        if file.get("mimeType") == GDriveMimeType.DOC.value:
            try:
-                # get_document_sections is the advanced approach for Google Docs
                sections = get_document_sections(docs_service, file["id"])
            except Exception as e:
                logger.warning(
-                    f"Failed to pull google doc sections from '{file['name']}': {e}. "
-                    "Falling back to basic extraction."
+                    f"Ran into exception '{e}' when pulling sections from Google Doc '{file['name']}'."
+                    " Falling back to basic extraction."
                )
-
-        # If not a doc, or if we failed above, do our 'basic' approach
+        # NOTE: this will run for either (1) the above failed or (2) the file is not a Google Doc
        if not sections:
-            sections = _extract_sections_basic(file, drive_service, image_analysis_llm)
+            try:
+                # For all other file types just extract the text
+                sections = _extract_sections_basic(file, drive_service)

+            except HttpError as e:
+                reason = e.error_details[0]["reason"] if e.error_details else e.reason
+                message = e.error_details[0]["message"] if e.error_details else e.reason
+                if e.status_code == 403 and reason in ERRORS_TO_CONTINUE_ON:
+                    logger.warning(
+                        f"Could not export file '{file['name']}' due to '{message}', skipping..."
+                    )
+                    return None
+
+                raise
        if not sections:
            return None

-        doc_id = file["webViewLink"]
-        updated_time = datetime.fromisoformat(file["modifiedTime"]).astimezone(
-            timezone.utc
-        )
-
        return Document(
-            id=doc_id,
+            id=file["webViewLink"],
            sections=sections,
            source=DocumentSource.GOOGLE_DRIVE,
            semantic_identifier=file["name"],
-            doc_updated_at=updated_time,
-            metadata={},  # or any metadata from 'file'
+            doc_updated_at=datetime.fromisoformat(file["modifiedTime"]).astimezone(
+                timezone.utc
+            ),
+            metadata={}
+            if any(section.text for section in sections)
+            else {IGNORE_FOR_QA: "True"},
            additional_info=file.get("id"),
        )
-
    except Exception as e:
-        logger.exception(f"Error converting file '{file.get('name')}' to Document: {e}")
        if not CONTINUE_ON_CONNECTOR_FAILURE:
-            raise
+            raise e
+
+        logger.exception("Ran into exception when pulling a file from Google Drive")
    return None


 def build_slim_document(file: GoogleDriveFileType) -> SlimDocument | None:
+    # Skip files that are folders or shortcuts
    if file.get("mimeType") in [DRIVE_FOLDER_TYPE, DRIVE_SHORTCUT_TYPE]:
        return None
+
    return SlimDocument(
        id=file["webViewLink"],
        perm_sync_data={
--- a/backend/onyx/connectors/models.py
+++ b/backend/onyx/connectors/models.py
@@ -28,8 +28,7 @@ class ConnectorMissingCredentialError(PermissionError):

 class Section(BaseModel):
    text: str
-    link: str | None = None
-    image_file_name: str | None = None
+    link: str | None


 class BasicExpertInfo(BaseModel):
--- a/backend/onyx/connectors/vision_enabled_connector.py
+++ b/backend/onyx/connectors/vision_enabled_connector.py
@@ -1,45 +0,0 @@
-"""
-Mixin for connectors that need vision capabilities.
-"""
-from onyx.configs.llm_configs import get_image_extraction_and_analysis_enabled
-from onyx.llm.factory import get_default_llm_with_vision
-from onyx.llm.interfaces import LLM
-from onyx.utils.logger import setup_logger
-
-logger = setup_logger()
-
-
-class VisionEnabledConnector:
-    """
-    Mixin for connectors that need vision capabilities.
-
-    This mixin provides a standard way to initialize a vision-capable LLM
-    for image analysis during indexing.
-
-    Usage:
-        class MyConnector(LoadConnector, VisionEnabledConnector):
-            def __init__(self, ...):
-                super().__init__(...)
-                self.initialize_vision_llm()
-    """
-
-    def initialize_vision_llm(self) -> None:
-        """
-        Initialize a vision-capable LLM if enabled by configuration.
-
-        Sets self.image_analysis_llm to the LLM instance or None if disabled.
-        """
-        self.image_analysis_llm: LLM | None = None
-        if get_image_extraction_and_analysis_enabled():
-            try:
-                self.image_analysis_llm = get_default_llm_with_vision()
-                if self.image_analysis_llm is None:
-                    logger.warning(
-                        "No LLM with vision found; image summarization will be disabled"
-                    )
-            except Exception as e:
-                logger.warning(
-                    f"Failed to initialize vision LLM due to an error: {str(e)}. "
-                    "Image summarization will be disabled."
-                )
-                self.image_analysis_llm = None
--- a/backend/onyx/connectors/web/connector.py
+++ b/backend/onyx/connectors/web/connector.py
@@ -157,7 +157,6 @@ def get_internal_links(

 def start_playwright() -> Tuple[Playwright, BrowserContext]:
    playwright = sync_playwright().start()
-
    browser = playwright.chromium.launch(headless=True)

    context = browser.new_context()
@@ -333,7 +332,7 @@ class WebConnector(LoadConnector):
                if initial_url.split(".")[-1] == "pdf":
                    # PDF files are not checked for links
                    response = requests.get(initial_url)
-                    page_text, metadata, images = read_pdf_file(
+                    page_text, metadata = read_pdf_file(
                        file=io.BytesIO(response.content)
                    )
                    last_modified = response.headers.get("Last-Modified")
--- a/backend/onyx/context/search/postprocessing/postprocessing.py
+++ b/backend/onyx/context/search/postprocessing/postprocessing.py
@@ -1,17 +1,12 @@
-import base64
 from collections.abc import Callable
 from collections.abc import Iterator
 from typing import cast

 import numpy
-from langchain_core.messages import BaseMessage
-from langchain_core.messages import HumanMessage
-from langchain_core.messages import SystemMessage

 from onyx.chat.models import SectionRelevancePiece
 from onyx.configs.app_configs import BLURB_SIZE
 from onyx.configs.constants import RETURN_SEPARATOR
-from onyx.configs.llm_configs import get_search_time_image_analysis_enabled
 from onyx.configs.model_configs import CROSS_ENCODER_RANGE_MAX
 from onyx.configs.model_configs import CROSS_ENCODER_RANGE_MIN
 from onyx.context.search.enums import LLMEvaluationType
@@ -23,15 +18,11 @@ from onyx.context.search.models import MAX_METRICS_CONTENT
 from onyx.context.search.models import RerankingDetails
 from onyx.context.search.models import RerankMetricsContainer
 from onyx.context.search.models import SearchQuery
-from onyx.db.engine import get_session_with_current_tenant
 from onyx.document_index.document_index_utils import (
    translate_boost_count_to_multiplier,
 )
-from onyx.file_store.file_store import get_default_file_store
 from onyx.llm.interfaces import LLM
-from onyx.llm.utils import message_to_string
 from onyx.natural_language_processing.search_nlp_models import RerankingModel
-from onyx.prompts.image_analysis import IMAGE_ANALYSIS_SYSTEM_PROMPT
 from onyx.secondary_llm_flows.chunk_usefulness import llm_batch_eval_sections
 from onyx.utils.logger import setup_logger
 from onyx.utils.threadpool_concurrency import FunctionCall
@@ -39,124 +30,6 @@ from onyx.utils.threadpool_concurrency import run_functions_in_parallel
 from onyx.utils.timing import log_function_time


-def update_image_sections_with_query(
-    sections: list[InferenceSection],
-    query: str,
-    llm: LLM,
-) -> None:
-    """
-    For each chunk in each section that has an image URL, call an LLM to produce
-    a new 'content' string that directly addresses the user's query about that image.
-    This implementation uses parallel processing for efficiency.
-    """
-    logger = setup_logger()
-    logger.debug(f"Starting image section update with query: {query}")
-
-    chunks_with_images = []
-    for section in sections:
-        for chunk in section.chunks:
-            if chunk.image_file_name:
-                chunks_with_images.append(chunk)
-
-    if not chunks_with_images:
-        logger.debug("No images to process in the sections")
-        return  # No images to process
-
-    logger.info(f"Found {len(chunks_with_images)} chunks with images to process")
-
-    def process_image_chunk(chunk: InferenceChunk) -> tuple[str, str]:
-        try:
-            logger.debug(
-                f"Processing image chunk with ID: {chunk.unique_id}, image: {chunk.image_file_name}"
-            )
-            with get_session_with_current_tenant() as db_session:
-                file_record = get_default_file_store(db_session).read_file(
-                    cast(str, chunk.image_file_name), mode="b"
-                )
-                if not file_record:
-                    logger.error(f"Image file not found: {chunk.image_file_name}")
-                    raise Exception("File not found")
-                file_content = file_record.read()
-                image_base64 = base64.b64encode(file_content).decode()
-                logger.debug(
-                    f"Successfully loaded image data for {chunk.image_file_name}"
-                )
-
-            messages: list[BaseMessage] = [
-                SystemMessage(content=IMAGE_ANALYSIS_SYSTEM_PROMPT),
-                HumanMessage(
-                    content=[
-                        {
-                            "type": "text",
-                            "text": (
-                                f"The user's question is: '{query}'. "
-                                "Please analyze the following image in that context:\n"
-                            ),
-                        },
-                        {
-                            "type": "image_url",
-                            "image_url": {
-                                "url": f"data:image/jpeg;base64,{image_base64}",
-                            },
-                        },
-                    ]
-                ),
-            ]
-
-            raw_response = llm.invoke(messages)
-
-            answer_text = message_to_string(raw_response).strip()
-            return (
-                chunk.unique_id,
-                answer_text if answer_text else "No relevant info found.",
-            )
-
-        except Exception:
-            logger.exception(
-                f"Error updating image section with query source image url: {chunk.image_file_name}"
-            )
-            return chunk.unique_id, "Error analyzing image."
-
-    image_processing_tasks = [
-        FunctionCall(process_image_chunk, (chunk,)) for chunk in chunks_with_images
-    ]
-
-    logger.info(
-        f"Starting parallel processing of {len(image_processing_tasks)} image tasks"
-    )
-    image_processing_results = run_functions_in_parallel(image_processing_tasks)
-    logger.info(
-        f"Completed parallel processing with {len(image_processing_results)} results"
-    )
-
-    # Create a mapping of chunk IDs to their processed content
-    chunk_id_to_content = {}
-    success_count = 0
-    for task_id, result in image_processing_results.items():
-        if result:
-            chunk_id, content = result
-            chunk_id_to_content[chunk_id] = content
-            success_count += 1
-        else:
-            logger.error(f"Task {task_id} failed to return a valid result")
-
-    logger.info(
-        f"Successfully processed {success_count}/{len(image_processing_results)} images"
-    )
-
-    # Update the chunks with the processed content
-    updated_count = 0
-    for section in sections:
-        for chunk in section.chunks:
-            if chunk.unique_id in chunk_id_to_content:
-                chunk.content = chunk_id_to_content[chunk.unique_id]
-                updated_count += 1
-
-    logger.info(
-        f"Updated content for {updated_count} chunks with image analysis results"
-    )
-
-
 logger = setup_logger()


@@ -413,10 +286,6 @@ def search_postprocessing(
        # NOTE: if we don't rerank, we can return the chunks immediately
        # since we know this is the final order.
        # This way the user experience isn't delayed by the LLM step
-        if get_search_time_image_analysis_enabled():
-            update_image_sections_with_query(
-                retrieved_sections, search_query.query, llm
-            )
        _log_top_section_links(search_query.search_type.value, retrieved_sections)
        yield retrieved_sections
        sections_yielded = True
@@ -454,13 +323,6 @@ def search_postprocessing(
            )
        else:
            _log_top_section_links(search_query.search_type.value, reranked_sections)
-
-            # Add the image processing step here
-            if get_search_time_image_analysis_enabled():
-                update_image_sections_with_query(
-                    reranked_sections, search_query.query, llm
-                )
-
            yield reranked_sections

    llm_selected_section_ids = (
--- a/backend/onyx/db/pg_file_store.py
+++ b/backend/onyx/db/pg_file_store.py
@@ -148,28 +148,3 @@ def upsert_pgfilestore(
        db_session.commit()

    return pgfilestore
-
-
-def save_bytes_to_pgfilestore(
-    db_session: Session,
-    raw_bytes: bytes,
-    media_type: str,
-    identifier: str,
-    display_name: str,
-    file_origin: FileOrigin = FileOrigin.OTHER,
-) -> PGFileStore:
-    """
-    Saves raw bytes to PGFileStore and returns the resulting record.
-    """
-    file_name = f"{file_origin.name.lower()}_{identifier}"
-    lobj_oid = create_populate_lobj(BytesIO(raw_bytes), db_session)
-    pgfilestore = upsert_pgfilestore(
-        file_name=file_name,
-        display_name=display_name,
-        file_origin=file_origin,
-        file_type=media_type,
-        lobj_oid=lobj_oid,
-        db_session=db_session,
-        commit=True,
-    )
-    return pgfilestore
--- a/backend/onyx/db/seeding/chat_history_seeding.py
+++ b/backend/onyx/db/seeding/chat_history_seeding.py
@@ -1,53 +0,0 @@
-import random
-from datetime import datetime
-from datetime import timedelta
-
-from onyx.configs.constants import MessageType
-from onyx.db.chat import create_chat_session
-from onyx.db.chat import create_new_chat_message
-from onyx.db.chat import get_or_create_root_message
-from onyx.db.engine import get_session_with_current_tenant
-from onyx.db.models import ChatSession
-
-
-def seed_chat_history(num_sessions: int, num_messages: int, days: int) -> None:
-    """Utility function to seed chat history for testing.
-
-    num_sessions: the number of sessions to seed
-    num_messages: the number of messages to seed per sessions
-    days: the number of days looking backwards from the current time over which to randomize
-    the times.
-    """
-    with get_session_with_current_tenant() as db_session:
-        for y in range(0, num_sessions):
-            create_chat_session(db_session, f"pytest_session_{y}", None, None)
-
-        # randomize all session times
-        rows = db_session.query(ChatSession).all()
-        for row in rows:
-            row.time_created = datetime.utcnow() - timedelta(
-                days=random.randint(0, days)
-            )
-            row.time_updated = row.time_created + timedelta(
-                minutes=random.randint(0, 10)
-            )
-
-            root_message = get_or_create_root_message(row.id, db_session)
-
-            for x in range(0, num_messages):
-                chat_message = create_new_chat_message(
-                    row.id,
-                    root_message,
-                    f"pytest_message_{x}",
-                    None,
-                    0,
-                    MessageType.USER,
-                    db_session,
-                )
-
-                chat_message.time_sent = row.time_created + timedelta(
-                    minutes=random.randint(0, 10)
-                )
-            db_session.commit()
-
-        db_session.commit()
--- a/backend/onyx/document_index/vespa/app_config/schemas/danswer_chunk.sd
+++ b/backend/onyx/document_index/vespa/app_config/schemas/danswer_chunk.sd
@@ -55,9 +55,6 @@ schema DANSWER_CHUNK_NAME {
        field blurb type string {
            indexing: summary | attribute
        }
-        field image_file_name type string {
-            indexing: summary | attribute
-        }
        # https://docs.vespa.ai/en/attributes.html potential enum store for speed, but probably not worth it
        field source_type type string {
            indexing: summary | attribute
--- a/backend/onyx/document_index/vespa/chunk_retrieval.py
+++ b/backend/onyx/document_index/vespa/chunk_retrieval.py
@@ -31,7 +31,6 @@ from onyx.document_index.vespa_constants import DOC_UPDATED_AT
 from onyx.document_index.vespa_constants import DOCUMENT_ID
 from onyx.document_index.vespa_constants import DOCUMENT_ID_ENDPOINT
 from onyx.document_index.vespa_constants import HIDDEN
-from onyx.document_index.vespa_constants import IMAGE_FILE_NAME
 from onyx.document_index.vespa_constants import LARGE_CHUNK_REFERENCE_IDS
 from onyx.document_index.vespa_constants import MAX_ID_SEARCH_QUERY_SIZE
 from onyx.document_index.vespa_constants import MAX_OR_CONDITIONS
@@ -131,7 +130,6 @@ def _vespa_hit_to_inference_chunk(
        section_continuation=fields[SECTION_CONTINUATION],
        document_id=fields[DOCUMENT_ID],
        source_type=fields[SOURCE_TYPE],
-        image_file_name=fields.get(IMAGE_FILE_NAME),
        title=fields.get(TITLE),
        semantic_identifier=fields[SEMANTIC_IDENTIFIER],
        boost=fields.get(BOOST, 1),
@@ -213,7 +211,6 @@ def _get_chunks_via_visit_api(

        # Check if the response contains any documents
        response_data = response.json()
-
        if "documents" in response_data:
            for document in response_data["documents"]:
                if filters.access_control_list:
--- a/backend/onyx/document_index/vespa/indexing_utils.py
+++ b/backend/onyx/document_index/vespa/indexing_utils.py
@@ -32,7 +32,6 @@ from onyx.document_index.vespa_constants import DOCUMENT_ID
 from onyx.document_index.vespa_constants import DOCUMENT_ID_ENDPOINT
 from onyx.document_index.vespa_constants import DOCUMENT_SETS
 from onyx.document_index.vespa_constants import EMBEDDINGS
-from onyx.document_index.vespa_constants import IMAGE_FILE_NAME
 from onyx.document_index.vespa_constants import LARGE_CHUNK_REFERENCE_IDS
 from onyx.document_index.vespa_constants import METADATA
 from onyx.document_index.vespa_constants import METADATA_LIST
@@ -199,13 +198,13 @@ def _index_vespa_chunk(
        # which only calls VespaIndex.update
        ACCESS_CONTROL_LIST: {acl_entry: 1 for acl_entry in chunk.access.to_acl()},
        DOCUMENT_SETS: {document_set: 1 for document_set in chunk.document_sets},
-        IMAGE_FILE_NAME: chunk.image_file_name,
        BOOST: chunk.boost,
    }

    if multitenant:
        if chunk.tenant_id:
            vespa_document_fields[TENANT_ID] = chunk.tenant_id
+
    vespa_url = f"{DOCUMENT_ID_ENDPOINT.format(index_name=index_name)}/{vespa_chunk_id}"
    logger.debug(f'Indexing to URL "{vespa_url}"')
    res = http_client.post(
--- a/backend/onyx/document_index/vespa_constants.py
+++ b/backend/onyx/document_index/vespa_constants.py
@@ -77,7 +77,6 @@ PRIMARY_OWNERS = "primary_owners"
 SECONDARY_OWNERS = "secondary_owners"
 RECENCY_BIAS = "recency_bias"
 HIDDEN = "hidden"
-IMAGE_FILE_NAME = "image_file_name"

 # Specific to Vespa, needed for highlighting matching keywords / section
 CONTENT_SUMMARY = "content_summary"
@@ -95,7 +94,6 @@ YQL_BASE = (
    f"{SEMANTIC_IDENTIFIER}, "
    f"{TITLE}, "
    f"{SECTION_CONTINUATION}, "
-    f"{IMAGE_FILE_NAME}, "
    f"{BOOST}, "
    f"{HIDDEN}, "
    f"{DOC_UPDATED_AT}, "
--- a/backend/onyx/file_processing/extract_file_text.py
+++ b/backend/onyx/file_processing/extract_file_text.py
@@ -9,17 +9,15 @@ from email.parser import Parser as EmailParser
 from io import BytesIO
 from pathlib import Path
 from typing import Any
+from typing import Dict
 from typing import IO
-from typing import List
-from typing import Tuple

 import chardet
 import docx  # type: ignore
 import openpyxl  # type: ignore
 import pptx  # type: ignore
-from docx import Document as DocxDocument
+from docx import Document
 from fastapi import UploadFile
-from PIL import Image
 from pypdf import PdfReader
 from pypdf.errors import PdfStreamError

@@ -33,8 +31,10 @@ from onyx.utils.logger import setup_logger

 logger = setup_logger()

+
 TEXT_SECTION_SEPARATOR = "\n\n"

+
 PLAIN_TEXT_FILE_EXTENSIONS = [
    ".txt",
    ".md",
@@ -49,6 +49,7 @@ PLAIN_TEXT_FILE_EXTENSIONS = [
    ".yaml",
 ]

+
 VALID_FILE_EXTENSIONS = PLAIN_TEXT_FILE_EXTENSIONS + [
    ".pdf",
    ".docx",
@@ -57,16 +58,6 @@ VALID_FILE_EXTENSIONS = PLAIN_TEXT_FILE_EXTENSIONS + [
    ".eml",
    ".epub",
    ".html",
-    ".png",
-    ".jpg",
-    ".jpeg",
-    ".webp",
-]
-
-IMAGE_MEDIA_TYPES = [
-    "image/png",
-    "image/jpeg",
-    "image/webp",
 ]


@@ -76,13 +67,11 @@ def is_text_file_extension(file_name: str) -> bool:

 def get_file_ext(file_path_or_name: str | Path) -> str:
    _, extension = os.path.splitext(file_path_or_name)
+    # standardize all extensions to be lowercase so that checks against
+    # VALID_FILE_EXTENSIONS and similar will work as intended
    return extension.lower()


-def is_valid_media_type(media_type: str) -> bool:
-    return media_type in IMAGE_MEDIA_TYPES
-
-
 def is_valid_file_ext(ext: str) -> bool:
    return ext in VALID_FILE_EXTENSIONS

@@ -90,18 +79,17 @@ def is_valid_file_ext(ext: str) -> bool:
 def is_text_file(file: IO[bytes]) -> bool:
    """
    checks if the first 1024 bytes only contain printable or whitespace characters
-    if it does, then we say it's a plaintext file
+    if it does, then we say its a plaintext file
    """
    raw_data = file.read(1024)
-    file.seek(0)
    text_chars = bytearray({7, 8, 9, 10, 12, 13, 27} | set(range(0x20, 0x100)) - {0x7F})
    return all(c in text_chars for c in raw_data)


 def detect_encoding(file: IO[bytes]) -> str:
    raw_data = file.read(50000)
-    file.seek(0)
    encoding = chardet.detect(raw_data)["encoding"] or "utf-8"
+    file.seek(0)
    return encoding


@@ -111,14 +99,14 @@ def is_macos_resource_fork_file(file_name: str) -> bool:
    )


+# To include additional metadata in the search index, add a .onyx_metadata.json file
+# to the zip file. This file should contain a list of objects with the following format:
+# [{ "filename": "file1.txt", "link": "https://example.com/file1.txt" }]
 def load_files_from_zip(
    zip_file_io: IO,
    ignore_macos_resource_fork_files: bool = True,
    ignore_dirs: bool = True,
 ) -> Iterator[tuple[zipfile.ZipInfo, IO[Any], dict[str, Any]]]:
-    """
-    If there's a .onyx_metadata.json in the zip, attach those metadata to each subfile.
-    """
    with zipfile.ZipFile(zip_file_io, "r") as zip_file:
        zip_metadata = {}
        try:
@@ -130,31 +118,24 @@ def load_files_from_zip(
                        # convert list of dicts to dict of dicts
                        zip_metadata = {d["filename"]: d for d in zip_metadata}
                except json.JSONDecodeError:
-                    logger.warning(f"Unable to load {DANSWER_METADATA_FILENAME}")
+                    logger.warn(f"Unable to load {DANSWER_METADATA_FILENAME}")
        except KeyError:
            logger.info(f"No {DANSWER_METADATA_FILENAME} file")

        for file_info in zip_file.infolist():
-            if ignore_dirs and file_info.is_dir():
-                continue
+            with zip_file.open(file_info.filename, "r") as file:
+                if ignore_dirs and file_info.is_dir():
+                    continue

-            if (
-                ignore_macos_resource_fork_files
-                and is_macos_resource_fork_file(file_info.filename)
-            ) or file_info.filename == DANSWER_METADATA_FILENAME:
-                continue
-
-            with zip_file.open(file_info.filename, "r") as subfile:
-                yield file_info, subfile, zip_metadata.get(file_info.filename, {})
+                if (
+                    ignore_macos_resource_fork_files
+                    and is_macos_resource_fork_file(file_info.filename)
+                ) or file_info.filename == DANSWER_METADATA_FILENAME:
+                    continue
+                yield file_info, file, zip_metadata.get(file_info.filename, {})


 def _extract_onyx_metadata(line: str) -> dict | None:
-    """
-    Example: first line has:
-        <!-- DANSWER_METADATA={"title": "..."} -->
-      or
-        #DANSWER_METADATA={"title":"..."}
-    """
    html_comment_pattern = r"<!--\s*DANSWER_METADATA=\{(.*?)\}\s*-->"
    hashtag_pattern = r"#DANSWER_METADATA=\{(.*?)\}"

@@ -180,13 +161,9 @@ def read_text_file(
    errors: str = "replace",
    ignore_onyx_metadata: bool = True,
 ) -> tuple[str, dict]:
-    """
-    For plain text files. Optionally extracts Onyx metadata from the first line.
-    """
    metadata = {}
    file_content_raw = ""
    for ind, line in enumerate(file):
-        # decode
        try:
            line = line.decode(encoding) if isinstance(line, bytes) else line
        except UnicodeDecodeError:
@@ -196,132 +173,131 @@ def read_text_file(
                else line
            )

-        # optionally parse metadata in the first line
-        if ind == 0 and not ignore_onyx_metadata:
-            potential_meta = _extract_onyx_metadata(line)
-            if potential_meta is not None:
-                metadata = potential_meta
-                continue
-
-        file_content_raw += line
+        if ind == 0:
+            metadata_or_none = (
+                None if ignore_onyx_metadata else _extract_onyx_metadata(line)
+            )
+            if metadata_or_none is not None:
+                metadata = metadata_or_none
+            else:
+                file_content_raw += line
+        else:
+            file_content_raw += line

    return file_content_raw, metadata


 def pdf_to_text(file: IO[Any], pdf_pass: str | None = None) -> str:
-    """
-    Extract text from a PDF. For embedded images, a more complex approach is needed.
-    This is a minimal approach returning text only.
-    """
-    text, _, _ = read_pdf_file(file, pdf_pass)
+    """Extract text from a PDF file."""
+    # Return only the extracted text from read_pdf_file
+    text, _ = read_pdf_file(file, pdf_pass)
    return text


 def read_pdf_file(
-    file: IO[Any], pdf_pass: str | None = None, extract_images: bool = False
-) -> tuple[str, dict, list[tuple[bytes, str]]]:
-    """
-    Returns the text, basic PDF metadata, and optionally extracted images.
-    """
-    metadata: dict[str, Any] = {}
-    extracted_images: list[tuple[bytes, str]] = []
+    file: IO[Any],
+    pdf_pass: str | None = None,
+) -> tuple[str, dict]:
+    metadata: Dict[str, Any] = {}
    try:
        pdf_reader = PdfReader(file)

+        # If marked as encrypted and a password is provided, try to decrypt
        if pdf_reader.is_encrypted and pdf_pass is not None:
            decrypt_success = False
-            try:
-                decrypt_success = pdf_reader.decrypt(pdf_pass) != 0
-            except Exception:
-                logger.error("Unable to decrypt pdf")
+            if pdf_pass is not None:
+                try:
+                    decrypt_success = pdf_reader.decrypt(pdf_pass) != 0
+                except Exception:
+                    logger.error("Unable to decrypt pdf")

            if not decrypt_success:
-                return "", metadata, []
+                # By user request, keep files that are unreadable just so they
+                # can be discoverable by title.
+                return "", metadata
        elif pdf_reader.is_encrypted:
-            logger.warning("No Password for an encrypted PDF, returning empty text.")
-            return "", metadata, []
+            logger.warning("No Password available to decrypt pdf, returning empty")
+            return "", metadata

-        # Basic PDF metadata
+        # Extract metadata from the PDF, removing leading '/' from keys if present
+        # This standardizes the metadata keys for consistency
+        metadata = {}
        if pdf_reader.metadata is not None:
            for key, value in pdf_reader.metadata.items():
                clean_key = key.lstrip("/")
                if isinstance(value, str) and value.strip():
                    metadata[clean_key] = value
+
                elif isinstance(value, list) and all(
                    isinstance(item, str) for item in value
                ):
                    metadata[clean_key] = ", ".join(value)

-        text = TEXT_SECTION_SEPARATOR.join(
-            page.extract_text() for page in pdf_reader.pages
+        return (
+            TEXT_SECTION_SEPARATOR.join(
+                page.extract_text() for page in pdf_reader.pages
+            ),
+            metadata,
        )
-
-        if extract_images:
-            for page_num, page in enumerate(pdf_reader.pages):
-                for image_file_object in page.images:
-                    image = Image.open(io.BytesIO(image_file_object.data))
-                    img_byte_arr = io.BytesIO()
-                    image.save(img_byte_arr, format=image.format)
-                    img_bytes = img_byte_arr.getvalue()
-
-                    image_name = (
-                        f"page_{page_num + 1}_image_{image_file_object.name}."
-                        f"{image.format.lower() if image.format else 'png'}"
-                    )
-                    extracted_images.append((img_bytes, image_name))
-
-        return text, metadata, extracted_images
-
    except PdfStreamError:
-        logger.exception("Invalid PDF file")
+        logger.exception("PDF file is not a valid PDF")
    except Exception:
        logger.exception("Failed to read PDF")

-    return "", metadata, []
+    # File is still discoverable by title
+    # but the contents are not included as they cannot be parsed
+    return "", metadata


-def docx_to_text_and_images(
-    file: IO[Any],
-) -> Tuple[str, List[Tuple[bytes, str]]]:
-    """
-    Extract text from a docx. If embed_images=True, also extract inline images.
-    Return (text_content, list_of_images).
-    """
+def docx_to_text(file: IO[Any]) -> str:
+    def is_simple_table(table: docx.table.Table) -> bool:
+        for row in table.rows:
+            # No omitted cells
+            if row.grid_cols_before > 0 or row.grid_cols_after > 0:
+                return False
+
+            # No nested tables
+            if any(cell.tables for cell in row.cells):
+                return False
+
+        return True
+
+    def extract_cell_text(cell: docx.table._Cell) -> str:
+        cell_paragraphs = [para.text.strip() for para in cell.paragraphs]
+        return " ".join(p for p in cell_paragraphs if p) or "N/A"
+
    paragraphs = []
-    embedded_images: List[Tuple[bytes, str]] = []
-
    doc = docx.Document(file)
+    for item in doc.iter_inner_content():
+        if isinstance(item, docx.text.paragraph.Paragraph):
+            paragraphs.append(item.text)

-    # Grab text from paragraphs
-    for paragraph in doc.paragraphs:
-        paragraphs.append(paragraph.text)
+        elif isinstance(item, docx.table.Table):
+            if not item.rows or not is_simple_table(item):
+                continue

-    # Reset position so we can re-load the doc (python-docx has read the stream)
-    # Note: if python-docx has fully consumed the stream, you may need to open it again from memory.
-    # For large docs, a more robust approach is needed.
-    # This is a simplified example.
+            # Every row is a new line, joined with a single newline
+            table_content = "\n".join(
+                [
+                    ",\t".join(extract_cell_text(cell) for cell in row.cells)
+                    for row in item.rows
+                ]
+            )
+            paragraphs.append(table_content)

-    for rel_id, rel in doc.part.rels.items():
-        if "image" in rel.reltype:
-            # image is typically in rel.target_part.blob
-            image_bytes = rel.target_part.blob
-            image_name = rel.target_part.partname
-            # store
-            embedded_images.append((image_bytes, os.path.basename(str(image_name))))
-
-    text_content = "\n".join(paragraphs)
-    return text_content, embedded_images
+    # Docx already has good spacing between paragraphs
+    return "\n".join(paragraphs)


 def pptx_to_text(file: IO[Any]) -> str:
    presentation = pptx.Presentation(file)
    text_content = []
    for slide_number, slide in enumerate(presentation.slides, start=1):
-        slide_text = f"\nSlide {slide_number}:\n"
+        extracted_text = f"\nSlide {slide_number}:\n"
        for shape in slide.shapes:
            if hasattr(shape, "text"):
-                slide_text += shape.text + "\n"
-        text_content.append(slide_text)
+                extracted_text += shape.text + "\n"
+        text_content.append(extracted_text)
    return TEXT_SECTION_SEPARATOR.join(text_content)


@@ -329,21 +305,18 @@ def xlsx_to_text(file: IO[Any]) -> str:
    workbook = openpyxl.load_workbook(file, read_only=True)
    text_content = []
    for sheet in workbook.worksheets:
-        rows = []
-        for row in sheet.iter_rows(min_row=1, values_only=True):
-            row_str = ",".join(str(cell) if cell is not None else "" for cell in row)
-            rows.append(row_str)
-        sheet_str = "\n".join(rows)
-        text_content.append(sheet_str)
+        sheet_string = "\n".join(
+            ",".join(map(str, row))
+            for row in sheet.iter_rows(min_row=1, values_only=True)
+        )
+        text_content.append(sheet_string)
    return TEXT_SECTION_SEPARATOR.join(text_content)


 def eml_to_text(file: IO[Any]) -> str:
-    encoding = detect_encoding(file)
-    text_file = io.TextIOWrapper(file, encoding=encoding)
+    text_file = io.TextIOWrapper(file, encoding=detect_encoding(file))
    parser = EmailParser()
    message = parser.parse(text_file)
-
    text_content = []
    for part in message.walk():
        if part.get_content_type().startswith("text/plain"):
@@ -369,8 +342,8 @@ def epub_to_text(file: IO[Any]) -> str:

 def file_io_to_text(file: IO[Any]) -> str:
    encoding = detect_encoding(file)
-    file_content, _ = read_text_file(file, encoding=encoding)
-    return file_content
+    file_content_raw, _ = read_text_file(file, encoding=encoding)
+    return file_content_raw


 def extract_file_text(
@@ -379,13 +352,9 @@ def extract_file_text(
    break_on_unprocessable: bool = True,
    extension: str | None = None,
 ) -> str:
-    """
-    Legacy function that returns *only text*, ignoring embedded images.
-    For backward-compatibility in code that only wants text.
-    """
    extension_to_function: dict[str, Callable[[IO[Any]], str]] = {
        ".pdf": pdf_to_text,
-        ".docx": lambda f: docx_to_text_and_images(f)[0],  # no images
+        ".docx": docx_to_text,
        ".pptx": pptx_to_text,
        ".xlsx": xlsx_to_text,
        ".eml": eml_to_text,
@@ -399,23 +368,24 @@ def extract_file_text(
                return unstructured_to_text(file, file_name)
            except Exception as unstructured_error:
                logger.error(
-                    f"Failed to process with Unstructured: {str(unstructured_error)}. "
-                    "Falling back to normal processing."
+                    f"Failed to process with Unstructured: {str(unstructured_error)}. Falling back to normal processing."
                )
-        if extension is None:
-            extension = get_file_ext(file_name)
+                # Fall through to normal processing
+        final_extension: str
+        if file_name or extension:
+            if extension is not None:
+                final_extension = extension
+            elif file_name is not None:
+                final_extension = get_file_ext(file_name)

-        if is_valid_file_ext(extension):
-            func = extension_to_function.get(extension, file_io_to_text)
-            file.seek(0)
-            return func(file)
+            if is_valid_file_ext(final_extension):
+                return extension_to_function.get(final_extension, file_io_to_text)(file)

-        # If unknown extension, maybe it's a text file
-        file.seek(0)
+        # Either the file somehow has no name or the extension is not one that we recognize
        if is_text_file(file):
            return file_io_to_text(file)

-        raise ValueError("Unknown file extension or not recognized as text data")
+        raise ValueError("Unknown file extension and unknown text encoding")

    except Exception as e:
        if break_on_unprocessable:
@@ -426,93 +396,20 @@ def extract_file_text(
        return ""


-def extract_text_and_images(
-    file: IO[Any],
-    file_name: str,
-    pdf_pass: str | None = None,
-) -> Tuple[str, List[Tuple[bytes, str]]]:
-    """
-    Primary new function for the updated connector.
-    Returns (text_content, [(embedded_img_bytes, embedded_img_name), ...]).
-    """
-
-    try:
-        # Attempt unstructured if env var is set
-        if get_unstructured_api_key():
-            # If the user doesn't want embedded images, unstructured is fine
-            file.seek(0)
-            text_content = unstructured_to_text(file, file_name)
-            return (text_content, [])
-
-        extension = get_file_ext(file_name)
-
-        # docx example for embedded images
-        if extension == ".docx":
-            file.seek(0)
-            text_content, images = docx_to_text_and_images(file)
-            return (text_content, images)
-
-        # PDF example: we do not show complicated PDF image extraction here
-        # so we simply extract text for now and skip images.
-        if extension == ".pdf":
-            file.seek(0)
-            text_content, _, images = read_pdf_file(file, pdf_pass, extract_images=True)
-            return (text_content, images)
-
-        # For PPTX, XLSX, EML, etc., we do not show embedded image logic here.
-        # You can do something similar to docx if needed.
-        if extension == ".pptx":
-            file.seek(0)
-            return (pptx_to_text(file), [])
-
-        if extension == ".xlsx":
-            file.seek(0)
-            return (xlsx_to_text(file), [])
-
-        if extension == ".eml":
-            file.seek(0)
-            return (eml_to_text(file), [])
-
-        if extension == ".epub":
-            file.seek(0)
-            return (epub_to_text(file), [])
-
-        if extension == ".html":
-            file.seek(0)
-            return (parse_html_page_basic(file), [])
-
-        # If we reach here and it's a recognized text extension
-        if is_text_file_extension(file_name):
-            file.seek(0)
-            encoding = detect_encoding(file)
-            text_content_raw, _ = read_text_file(
-                file, encoding=encoding, ignore_onyx_metadata=False
-            )
-            return (text_content_raw, [])
-
-        # If it's an image file or something else, we do not parse embedded images from them
-        # just return empty text
-        file.seek(0)
-        return ("", [])
-
-    except Exception as e:
-        logger.exception(f"Failed to extract text/images from {file_name}: {e}")
-        return ("", [])
-
-
 def convert_docx_to_txt(
    file: UploadFile, file_store: FileStore, file_path: str
 ) -> None:
-    """
-    Helper to convert docx to a .txt file in the same filestore.
-    """
    file.file.seek(0)
    docx_content = file.file.read()
-    doc = DocxDocument(BytesIO(docx_content))
+    doc = Document(BytesIO(docx_content))

    # Extract text from the document
-    all_paras = [p.text for p in doc.paragraphs]
-    text_content = "\n".join(all_paras)
+    full_text = []
+    for para in doc.paragraphs:
+        full_text.append(para.text)
+
+    # Join the extracted text
+    text_content = "\n".join(full_text)

    txt_file_path = docx_to_txt_filename(file_path)
    file_store.save_file(
@@ -525,4 +422,7 @@ def convert_docx_to_txt(


 def docx_to_txt_filename(file_path: str) -> str:
+    """
+    Convert a .docx file path to its corresponding .txt file path.
+    """
    return file_path.rsplit(".", 1)[0] + ".txt"
--- a/backend/onyx/file_processing/file_validation.py
+++ b/backend/onyx/file_processing/file_validation.py
@@ -1,46 +0,0 @@
-"""
-Centralized file type validation utilities.
-"""
-# Standard image MIME types supported by most vision LLMs
-IMAGE_MIME_TYPES = [
-    "image/png",
-    "image/jpeg",
-    "image/jpg",
-    "image/webp",
-]
-
-# Image types that should be excluded from processing
-EXCLUDED_IMAGE_TYPES = [
-    "image/bmp",
-    "image/tiff",
-    "image/gif",
-    "image/svg+xml",
-]
-
-
-def is_valid_image_type(mime_type: str) -> bool:
-    """
-    Check if mime_type is a valid image type.
-
-    Args:
-        mime_type: The MIME type to check
-
-    Returns:
-        True if the MIME type is a valid image type, False otherwise
-    """
-    if not mime_type:
-        return False
-    return mime_type.startswith("image/") and mime_type not in EXCLUDED_IMAGE_TYPES
-
-
-def is_supported_by_vision_llm(mime_type: str) -> bool:
-    """
-    Check if this image type can be processed by vision LLMs.
-
-    Args:
-        mime_type: The MIME type to check
-
-    Returns:
-        True if the MIME type is supported by vision LLMs, False otherwise
-    """
-    return mime_type in IMAGE_MIME_TYPES
--- a/backend/onyx/file_processing/image_summarization.py
+++ b/backend/onyx/file_processing/image_summarization.py
@@ -1,129 +0,0 @@
-import base64
-from io import BytesIO
-
-from langchain_core.messages import BaseMessage
-from langchain_core.messages import HumanMessage
-from langchain_core.messages import SystemMessage
-from PIL import Image
-
-from onyx.llm.interfaces import LLM
-from onyx.llm.utils import message_to_string
-from onyx.prompts.image_analysis import IMAGE_SUMMARIZATION_SYSTEM_PROMPT
-from onyx.prompts.image_analysis import IMAGE_SUMMARIZATION_USER_PROMPT
-from onyx.utils.logger import setup_logger
-
-logger = setup_logger()
-
-
-def prepare_image_bytes(image_data: bytes) -> str:
-    """Prepare image bytes for summarization.
-    Resizes image if it's larger than 20MB. Encodes image as a base64 string."""
-    image_data = _resize_image_if_needed(image_data)
-
-    # encode image (base64)
-    encoded_image = _encode_image_for_llm_prompt(image_data)
-
-    return encoded_image
-
-
-def summarize_image_pipeline(
-    llm: LLM,
-    image_data: bytes,
-    query: str | None = None,
-    system_prompt: str | None = None,
-) -> str:
-    """Pipeline to generate a summary of an image.
-    Resizes images if it is bigger than 20MB. Encodes image as a base64 string.
-    And finally uses the Default LLM to generate a textual summary of the image."""
-    # resize image if it's bigger than 20MB
-    encoded_image = prepare_image_bytes(image_data)
-
-    summary = _summarize_image(
-        encoded_image,
-        llm,
-        query,
-        system_prompt,
-    )
-
-    return summary
-
-
-def summarize_image_with_error_handling(
-    llm: LLM | None,
-    image_data: bytes,
-    context_name: str,
-    system_prompt: str = IMAGE_SUMMARIZATION_SYSTEM_PROMPT,
-    user_prompt_template: str = IMAGE_SUMMARIZATION_USER_PROMPT,
-) -> str | None:
-    """Wrapper function that handles error cases and configuration consistently.
-
-    Args:
-        llm: The LLM with vision capabilities to use for summarization
-        image_data: The raw image bytes
-        context_name: Name or title of the image for context
-        system_prompt: System prompt to use for the LLM
-        user_prompt_template: Template for the user prompt, should contain {title} placeholder
-
-    Returns:
-        The image summary text, or None if summarization failed or is disabled
-    """
-    if llm is None:
-        return None
-
-    user_prompt = user_prompt_template.format(title=context_name)
-    return summarize_image_pipeline(llm, image_data, user_prompt, system_prompt)
-
-
-def _summarize_image(
-    encoded_image: str,
-    llm: LLM,
-    query: str | None = None,
-    system_prompt: str | None = None,
-) -> str:
-    """Use default LLM (if it is multimodal) to generate a summary of an image."""
-
-    messages: list[BaseMessage] = []
-
-    if system_prompt:
-        messages.append(SystemMessage(content=system_prompt))
-
-    messages.append(
-        HumanMessage(
-            content=[
-                {"type": "text", "text": query},
-                {"type": "image_url", "image_url": {"url": encoded_image}},
-            ],
-        ),
-    )
-
-    try:
-        return message_to_string(llm.invoke(messages))
-
-    except Exception as e:
-        raise ValueError(f"Summarization failed. Messages: {messages}") from e
-
-
-def _encode_image_for_llm_prompt(image_data: bytes) -> str:
-    """Getting the base64 string."""
-    base64_encoded_data = base64.b64encode(image_data).decode("utf-8")
-
-    return f"data:image/jpeg;base64,{base64_encoded_data}"
-
-
-def _resize_image_if_needed(image_data: bytes, max_size_mb: int = 20) -> bytes:
-    """Resize image if it's larger than the specified max size in MB."""
-    max_size_bytes = max_size_mb * 1024 * 1024
-
-    if len(image_data) > max_size_bytes:
-        with Image.open(BytesIO(image_data)) as img:
-            # Reduce dimensions for better size reduction
-            img.thumbnail((1024, 1024), Image.Resampling.LANCZOS)
-            output = BytesIO()
-
-            # Save with lower quality for compression
-            img.save(output, format="JPEG", quality=85)
-            resized_data = output.getvalue()
-
-            return resized_data
-
-    return image_data
--- a/backend/onyx/file_processing/image_utils.py
+++ b/backend/onyx/file_processing/image_utils.py
@@ -1,70 +0,0 @@
-from typing import Tuple
-
-from sqlalchemy.orm import Session
-
-from onyx.configs.app_configs import CONTINUE_ON_CONNECTOR_FAILURE
-from onyx.configs.constants import FileOrigin
-from onyx.connectors.models import Section
-from onyx.db.pg_file_store import save_bytes_to_pgfilestore
-from onyx.file_processing.image_summarization import summarize_image_with_error_handling
-from onyx.llm.interfaces import LLM
-from onyx.utils.logger import setup_logger
-
-logger = setup_logger()
-
-
-def store_image_and_create_section(
-    db_session: Session,
-    image_data: bytes,
-    file_name: str,
-    display_name: str,
-    media_type: str = "image/unknown",
-    llm: LLM | None = None,
-    file_origin: FileOrigin = FileOrigin.OTHER,
-) -> Tuple[Section, str | None]:
-    """
-    Stores an image in PGFileStore and creates a Section object with optional summarization.
-
-    Args:
-        db_session: Database session
-        image_data: Raw image bytes
-        file_name: Base identifier for the file
-        display_name: Human-readable name for the image
-        media_type: MIME type of the image
-        llm: Optional LLM with vision capabilities for summarization
-        file_origin: Origin of the file (e.g., CONFLUENCE, GOOGLE_DRIVE, etc.)
-
-    Returns:
-        Tuple containing:
-        - Section object with image reference and optional summary text
-        - The file_name in PGFileStore or None if storage failed
-    """
-    # Storage logic
-    stored_file_name = None
-    try:
-        pgfilestore = save_bytes_to_pgfilestore(
-            db_session=db_session,
-            raw_bytes=image_data,
-            media_type=media_type,
-            identifier=file_name,
-            display_name=display_name,
-            file_origin=file_origin,
-        )
-        stored_file_name = pgfilestore.file_name
-    except Exception as e:
-        logger.error(f"Failed to store image: {e}")
-        if not CONTINUE_ON_CONNECTOR_FAILURE:
-            raise
-        return Section(text=""), None
-
-    # Summarization logic
-    summary_text = ""
-    if llm:
-        summary_text = (
-            summarize_image_with_error_handling(llm, image_data, display_name) or ""
-        )
-
-    return (
-        Section(text=summary_text, image_file_name=stored_file_name),
-        stored_file_name,
-    )
--- a/backend/onyx/indexing/chunker.py
+++ b/backend/onyx/indexing/chunker.py
@@ -23,9 +23,12 @@ from shared_configs.configs import STRICT_CHUNK_TOKEN_LIMIT
 CHUNK_OVERLAP = 0
 # Fairly arbitrary numbers but the general concept is we don't want the title/metadata to
 # overwhelm the actual contents of the chunk
+# For example in a rare case, this could be 128 tokens for the 512 chunk and title prefix
+# could be another 128 tokens leaving 256 for the actual contents
 MAX_METADATA_PERCENTAGE = 0.25
 CHUNK_MIN_CONTENT = 256

+
 logger = setup_logger()


@@ -33,8 +36,16 @@ def _get_metadata_suffix_for_document_index(
    metadata: dict[str, str | list[str]], include_separator: bool = False
 ) -> tuple[str, str]:
    """
-    Returns the metadata as a natural language string representation with all of the keys and values
-    for the vector embedding and a string of all of the values for the keyword search.
+    Returns the metadata as a natural language string representation with all of the keys and values for the vector embedding
+    and a string of all of the values for the keyword search
+
+    For example, if we have the following metadata:
+    {
+        "author": "John Doe",
+        "space": "Engineering"
+    }
+    The vector embedding string should include the relation between the key and value wheres as for keyword we only want John Doe
+    and Engineering. The keys are repeat and much more noisy.
    """
    if not metadata:
        return "", ""
@@ -63,17 +74,12 @@ def _get_metadata_suffix_for_document_index(


 def _combine_chunks(chunks: list[DocAwareChunk], large_chunk_id: int) -> DocAwareChunk:
-    """
-    Combines multiple DocAwareChunks into one large chunk (for “multipass” mode),
-    appending the content and adjusting source_links accordingly.
-    """
    merged_chunk = DocAwareChunk(
        source_document=chunks[0].source_document,
        chunk_id=chunks[0].chunk_id,
        blurb=chunks[0].blurb,
        content=chunks[0].content,
        source_links=chunks[0].source_links or {},
-        image_file_name=None,
        section_continuation=(chunks[0].chunk_id > 0),
        title_prefix=chunks[0].title_prefix,
        metadata_suffix_semantic=chunks[0].metadata_suffix_semantic,
@@ -97,9 +103,6 @@ def _combine_chunks(chunks: list[DocAwareChunk], large_chunk_id: int) -> DocAwar


 def generate_large_chunks(chunks: list[DocAwareChunk]) -> list[DocAwareChunk]:
-    """
-    Generates larger “grouped” chunks by combining sets of smaller chunks.
-    """
    large_chunks = []
    for idx, i in enumerate(range(0, len(chunks), LARGE_CHUNK_RATIO)):
        chunk_group = chunks[i : i + LARGE_CHUNK_RATIO]
@@ -169,60 +172,23 @@ class Chunker:
        while start < total_tokens:
            end = min(start + content_token_limit, total_tokens)
            token_chunk = tokens[start:end]
+            # Join the tokens to reconstruct the text
            chunk_text = " ".join(token_chunk)
            chunks.append(chunk_text)
            start = end
        return chunks

    def _extract_blurb(self, text: str) -> str:
-        """
-        Extract a short blurb from the text (first chunk of size `blurb_size`).
-        """
        texts = self.blurb_splitter.split_text(text)
        if not texts:
            return ""
        return texts[0]

    def _get_mini_chunk_texts(self, chunk_text: str) -> list[str] | None:
-        """
-        For “multipass” mode: additional sub-chunks (mini-chunks) for use in certain embeddings.
-        """
        if self.mini_chunk_splitter and chunk_text.strip():
            return self.mini_chunk_splitter.split_text(chunk_text)
        return None

-    # ADDED: extra param image_url to store in the chunk
-    def _create_chunk(
-        self,
-        document: Document,
-        chunks_list: list[DocAwareChunk],
-        text: str,
-        links: dict[int, str],
-        is_continuation: bool = False,
-        title_prefix: str = "",
-        metadata_suffix_semantic: str = "",
-        metadata_suffix_keyword: str = "",
-        image_file_name: str | None = None,
-    ) -> None:
-        """
-        Helper to create a new DocAwareChunk, append it to chunks_list.
-        """
-        new_chunk = DocAwareChunk(
-            source_document=document,
-            chunk_id=len(chunks_list),
-            blurb=self._extract_blurb(text),
-            content=text,
-            source_links=links or {0: ""},
-            image_file_name=image_file_name,
-            section_continuation=is_continuation,
-            title_prefix=title_prefix,
-            metadata_suffix_semantic=metadata_suffix_semantic,
-            metadata_suffix_keyword=metadata_suffix_keyword,
-            mini_chunk_texts=self._get_mini_chunk_texts(text),
-            large_chunk_id=None,
-        )
-        chunks_list.append(new_chunk)
-
    def _chunk_document(
        self,
        document: Document,
@@ -232,156 +198,122 @@ class Chunker:
        content_token_limit: int,
    ) -> list[DocAwareChunk]:
        """
-        Loops through sections of the document, converting them into one or more chunks.
-        If a section has an image_link, we treat it as a dedicated chunk.
+        Loops through sections of the document, adds metadata and converts them into chunks.
        """
-
        chunks: list[DocAwareChunk] = []
        link_offsets: dict[int, str] = {}
        chunk_text = ""

+        def _create_chunk(
+            text: str,
+            links: dict[int, str],
+            is_continuation: bool = False,
+        ) -> DocAwareChunk:
+            return DocAwareChunk(
+                source_document=document,
+                chunk_id=len(chunks),
+                blurb=self._extract_blurb(text),
+                content=text,
+                source_links=links or {0: ""},
+                section_continuation=is_continuation,
+                title_prefix=title_prefix,
+                metadata_suffix_semantic=metadata_suffix_semantic,
+                metadata_suffix_keyword=metadata_suffix_keyword,
+                mini_chunk_texts=self._get_mini_chunk_texts(text),
+                large_chunk_id=None,
+            )
+
+        section_link_text: str
+
        for section_idx, section in enumerate(document.sections):
            section_text = clean_text(section.text)
            section_link_text = section.link or ""
-            # ADDED: if the Section has an image link
-            image_url = section.image_file_name
-
-            # If there is no useful content, skip
+            # If there is no useful content, not even the title, just drop it
            if not section_text and (not document.title or section_idx > 0):
+                # If a section is empty and the document has no title, we can just drop it. We return a list of
+                # DocAwareChunks where each one contains the necessary information needed down the line for indexing.
+                # There is no concern about dropping whole documents from this list, it should not cause any indexing failures.
                logger.warning(
-                    f"Skipping empty or irrelevant section in doc "
-                    f"{document.semantic_identifier}, link={section_link_text}"
+                    f"Skipping section {section.text} from document "
+                    f"{document.semantic_identifier} due to empty text after cleaning "
+                    f"with link {section_link_text}"
                )
                continue

-            # CASE 1: If this is an image section, force a separate chunk
-            if image_url:
-                # First, if we have any partially built text chunk, finalize it
-                if chunk_text.strip():
-                    self._create_chunk(
-                        document,
-                        chunks,
-                        chunk_text,
-                        link_offsets,
-                        is_continuation=False,
-                        title_prefix=title_prefix,
-                        metadata_suffix_semantic=metadata_suffix_semantic,
-                        metadata_suffix_keyword=metadata_suffix_keyword,
-                    )
-                    chunk_text = ""
-                    link_offsets = {}
-
-                # Create a chunk specifically for this image
-                # (If the section has text describing the image, use that as content)
-                self._create_chunk(
-                    document,
-                    chunks,
-                    section_text,
-                    links={0: section_link_text}
-                    if section_link_text
-                    else {},  # No text offsets needed for images
-                    image_file_name=image_url,
-                    title_prefix=title_prefix,
-                    metadata_suffix_semantic=metadata_suffix_semantic,
-                    metadata_suffix_keyword=metadata_suffix_keyword,
-                )
-                # Continue to next section
-                continue
-
-            # CASE 2: Normal text section
            section_token_count = len(self.tokenizer.tokenize(section_text))

-            # If the section is large on its own, split it separately
+            # Large sections are considered self-contained/unique
+            # Therefore, they start a new chunk and are not concatenated
+            # at the end by other sections
            if section_token_count > content_token_limit:
-                if chunk_text.strip():
-                    self._create_chunk(
-                        document,
-                        chunks,
-                        chunk_text,
-                        link_offsets,
-                        False,
-                        title_prefix,
-                        metadata_suffix_semantic,
-                        metadata_suffix_keyword,
-                    )
-                    chunk_text = ""
+                if chunk_text:
+                    chunks.append(_create_chunk(chunk_text, link_offsets))
                    link_offsets = {}
+                    chunk_text = ""

                split_texts = self.chunk_splitter.split_text(section_text)
+
                for i, split_text in enumerate(split_texts):
-                    # If even the split_text is bigger than strict limit, further split
                    if (
                        STRICT_CHUNK_TOKEN_LIMIT
-                        and len(self.tokenizer.tokenize(split_text))
-                        > content_token_limit
+                        and
+                        # Tokenizer only runs if STRICT_CHUNK_TOKEN_LIMIT is true
+                        len(self.tokenizer.tokenize(split_text)) > content_token_limit
                    ):
+                        # If STRICT_CHUNK_TOKEN_LIMIT is true, manually check
+                        # the token count of each split text to ensure it is
+                        # not larger than the content_token_limit
                        smaller_chunks = self._split_oversized_chunk(
                            split_text, content_token_limit
                        )
-                        for j, small_chunk in enumerate(smaller_chunks):
-                            self._create_chunk(
-                                document,
-                                chunks,
-                                small_chunk,
-                                {0: section_link_text},
-                                is_continuation=(j != 0),
-                                title_prefix=title_prefix,
-                                metadata_suffix_semantic=metadata_suffix_semantic,
-                                metadata_suffix_keyword=metadata_suffix_keyword,
+                        for i, small_chunk in enumerate(smaller_chunks):
+                            chunks.append(
+                                _create_chunk(
+                                    text=small_chunk,
+                                    links={0: section_link_text},
+                                    is_continuation=(i != 0),
+                                )
                            )
                    else:
-                        self._create_chunk(
-                            document,
-                            chunks,
-                            split_text,
-                            {0: section_link_text},
-                            is_continuation=(i != 0),
-                            title_prefix=title_prefix,
-                            metadata_suffix_semantic=metadata_suffix_semantic,
-                            metadata_suffix_keyword=metadata_suffix_keyword,
+                        chunks.append(
+                            _create_chunk(
+                                text=split_text,
+                                links={0: section_link_text},
+                                is_continuation=(i != 0),
+                            )
                        )
+
                continue

-            # If we can still fit this section into the current chunk, do so
            current_token_count = len(self.tokenizer.tokenize(chunk_text))
            current_offset = len(shared_precompare_cleanup(chunk_text))
+            # In the case where the whole section is shorter than a chunk, either add
+            # to chunk or start a new one
            next_section_tokens = (
                len(self.tokenizer.tokenize(SECTION_SEPARATOR)) + section_token_count
            )
-
            if next_section_tokens + current_token_count <= content_token_limit:
                if chunk_text:
                    chunk_text += SECTION_SEPARATOR
                chunk_text += section_text
                link_offsets[current_offset] = section_link_text
            else:
-                # finalize the existing chunk
-                self._create_chunk(
-                    document,
-                    chunks,
-                    chunk_text,
-                    link_offsets,
-                    False,
-                    title_prefix,
-                    metadata_suffix_semantic,
-                    metadata_suffix_keyword,
-                )
-                # start a new chunk
+                chunks.append(_create_chunk(chunk_text, link_offsets))
                link_offsets = {0: section_link_text}
                chunk_text = section_text

-        # finalize any leftover text chunk
+        # Once we hit the end, if we're still in the process of building a chunk, add what we have.
+        # If there is only whitespace left then don't include it. If there are no chunks at all
+        # from the doc, we can just create a single chunk with the title.
        if chunk_text.strip() or not chunks:
-            self._create_chunk(
-                document,
-                chunks,
-                chunk_text,
-                link_offsets or {0: ""},  # safe default
-                False,
-                title_prefix,
-                metadata_suffix_semantic,
-                metadata_suffix_keyword,
+            chunks.append(
+                _create_chunk(
+                    chunk_text,
+                    link_offsets or {0: section_link_text},
+                )
            )
+
+        # If the chunk does not have any useable content, it will not be indexed
        return chunks

    def _handle_single_document(self, document: Document) -> list[DocAwareChunk]:
@@ -389,12 +321,10 @@ class Chunker:
        if document.source == DocumentSource.GMAIL:
            logger.debug(f"Chunking {document.semantic_identifier}")

-        # Title prep
        title = self._extract_blurb(document.get_title_for_document_index() or "")
        title_prefix = title + RETURN_SEPARATOR if title else ""
        title_tokens = len(self.tokenizer.tokenize(title_prefix))

-        # Metadata prep
        metadata_suffix_semantic = ""
        metadata_suffix_keyword = ""
        metadata_tokens = 0
@@ -407,20 +337,19 @@ class Chunker:
            )
            metadata_tokens = len(self.tokenizer.tokenize(metadata_suffix_semantic))

-        # If metadata is too large, skip it in the semantic content
        if metadata_tokens >= self.chunk_token_limit * MAX_METADATA_PERCENTAGE:
+            # Note: we can keep the keyword suffix even if the semantic suffix is too long to fit in the model
+            # context, there is no limit for the keyword component
            metadata_suffix_semantic = ""
            metadata_tokens = 0

-        # Adjust content token limit to accommodate title + metadata
        content_token_limit = self.chunk_token_limit - title_tokens - metadata_tokens
+        # If there is not enough context remaining then just index the chunk with no prefix/suffix
        if content_token_limit <= CHUNK_MIN_CONTENT:
-            # Not enough space left, so revert to full chunk without the prefix
            content_token_limit = self.chunk_token_limit
            title_prefix = ""
            metadata_suffix_semantic = ""

-        # Chunk the document
        normal_chunks = self._chunk_document(
            document,
            title_prefix,
@@ -429,7 +358,6 @@ class Chunker:
            content_token_limit,
        )

-        # Optional “multipass” large chunk creation
        if self.enable_multipass and self.enable_large_chunks:
            large_chunks = generate_large_chunks(normal_chunks)
            normal_chunks.extend(large_chunks)
@@ -443,8 +371,9 @@ class Chunker:
        """
        final_chunks: list[DocAwareChunk] = []
        for document in documents:
-            if self.callback and self.callback.should_stop():
-                raise RuntimeError("Chunker.chunk: Stop signal detected")
+            if self.callback:
+                if self.callback.should_stop():
+                    raise RuntimeError("Chunker.chunk: Stop signal detected")

            chunks = self._handle_single_document(document)
            final_chunks.extend(chunks)
--- a/backend/onyx/indexing/models.py
+++ b/backend/onyx/indexing/models.py
@@ -29,7 +29,6 @@ class BaseChunk(BaseModel):
    content: str
    # Holds the link and the offsets into the raw Chunk text
    source_links: dict[int, str] | None
-    image_file_name: str | None
    # True if this Chunk's start is not at the start of a Section
    section_continuation: bool

--- a/backend/onyx/llm/factory.py
+++ b/backend/onyx/llm/factory.py
@@ -6,14 +6,12 @@ from onyx.configs.model_configs import GEN_AI_MODEL_FALLBACK_MAX_TOKENS
 from onyx.configs.model_configs import GEN_AI_TEMPERATURE
 from onyx.db.engine import get_session_context_manager
 from onyx.db.llm import fetch_default_provider
-from onyx.db.llm import fetch_existing_llm_providers
 from onyx.db.llm import fetch_provider
 from onyx.db.models import Persona
 from onyx.llm.chat_llm import DefaultMultiLLM
 from onyx.llm.exceptions import GenAIDisabledException
 from onyx.llm.interfaces import LLM
 from onyx.llm.override_models import LLMOverride
-from onyx.llm.utils import model_supports_image_input
 from onyx.utils.headers import build_llm_extra_headers
 from onyx.utils.logger import setup_logger
 from onyx.utils.long_term_log import LongTermLogger
@@ -88,48 +86,6 @@ def get_llms_for_persona(
    return _create_llm(model), _create_llm(fast_model)


-def get_default_llm_with_vision(
-    timeout: int | None = None,
-    temperature: float | None = None,
-    additional_headers: dict[str, str] | None = None,
-    long_term_logger: LongTermLogger | None = None,
-) -> LLM | None:
-    if DISABLE_GENERATIVE_AI:
-        raise GenAIDisabledException()
-
-    with get_session_context_manager() as db_session:
-        llm_providers = fetch_existing_llm_providers(db_session)
-
-    if not llm_providers:
-        return None
-
-    for provider in llm_providers:
-        model_name = provider.default_model_name
-        fast_model_name = (
-            provider.fast_default_model_name or provider.default_model_name
-        )
-
-        if not model_name or not fast_model_name:
-            continue
-
-        if model_supports_image_input(model_name, provider.provider):
-            return get_llm(
-                provider=provider.provider,
-                model=model_name,
-                deployment_name=provider.deployment_name,
-                api_key=provider.api_key,
-                api_base=provider.api_base,
-                api_version=provider.api_version,
-                custom_config=provider.custom_config,
-                timeout=timeout,
-                temperature=temperature,
-                additional_headers=additional_headers,
-                long_term_logger=long_term_logger,
-            )
-
-    raise ValueError("No LLM provider found that supports image input")
-
-
 def get_default_llms(
    timeout: int | None = None,
    temperature: float | None = None,
--- a/backend/onyx/prompts/image_analysis.py
+++ b/backend/onyx/prompts/image_analysis.py
@@ -1,22 +0,0 @@
-# Used for creating embeddings of images for vector search
-IMAGE_SUMMARIZATION_SYSTEM_PROMPT = """
-You are an assistant for summarizing images for retrieval.
-Summarize the content of the following image and be as precise as possible.
-The summary will be embedded and used to retrieve the original image.
-Therefore, write a concise summary of the image that is optimized for retrieval.
-"""
-
-# Prompt for generating image descriptions with filename context
-IMAGE_SUMMARIZATION_USER_PROMPT = """
-The image has the file name '{title}'.
-Describe precisely and concisely what the image shows.
-"""
-
-
-# Used for analyzing images in response to user queries at search time
-IMAGE_ANALYSIS_SYSTEM_PROMPT = (
-    "You are an AI assistant specialized in describing images.\n"
-    "You will receive a user question plus an image URL. Provide a concise textual answer.\n"
-    "Focus on aspects of the image that are relevant to the user's question.\n"
-    "Be specific and detailed about visual elements that directly address the query.\n"
-)
--- a/backend/onyx/seeding/load_docs.py
+++ b/backend/onyx/seeding/load_docs.py
@@ -55,11 +55,7 @@ def _create_indexable_chunks(
            # The section is not really used past this point since we have already done the other processing
            # for the chunking and embedding.
            sections=[
-                Section(
-                    text=preprocessed_doc["content"],
-                    link=preprocessed_doc["url"],
-                    image_file_name=None,
-                )
+                Section(text=preprocessed_doc["content"], link=preprocessed_doc["url"])
            ],
            source=DocumentSource.WEB,
            semantic_identifier=preprocessed_doc["title"],
@@ -97,7 +93,6 @@ def _create_indexable_chunks(
            document_sets=set(),
            boost=DEFAULT_BOOST,
            large_chunk_id=None,
-            image_file_name=None,
        )

        chunks.append(chunk)
--- a/backend/onyx/server/settings/models.py
+++ b/backend/onyx/server/settings/models.py
@@ -53,11 +53,6 @@ class Settings(BaseModel):
    auto_scroll: bool | None = False
    query_history_type: QueryHistoryType | None = None

-    # Image processing settings
-    image_extraction_and_analysis_enabled: bool | None = False
-    search_time_image_analysis_enabled: bool | None = False
-    image_analysis_max_size_mb: int | None = 20
-

 class UserSettings(Settings):
    notifications: list[Notification]
--- a/backend/onyx/server/settings/store.py
+++ b/backend/onyx/server/settings/store.py
@@ -47,7 +47,6 @@ def load_settings() -> Settings:

    settings.anonymous_user_enabled = anonymous_user_enabled
    settings.query_history_type = ONYX_QUERY_HISTORY_TYPE
-
    return settings


--- a/backend/onyx/utils/error_handling.py
+++ b/backend/onyx/utils/error_handling.py
@@ -1,23 +0,0 @@
-"""
-Standardized error handling utilities.
-"""
-from onyx.configs.app_configs import CONTINUE_ON_CONNECTOR_FAILURE
-from onyx.utils.logger import setup_logger
-
-logger = setup_logger()
-
-
-def handle_connector_error(e: Exception, context: str) -> None:
-    """
-    Standard error handling for connectors.
-
-    Args:
-        e: The exception that was raised
-        context: A description of where the error occurred
-
-    Raises:
-        The original exception if CONTINUE_ON_CONNECTOR_FAILURE is False
-    """
-    logger.error(f"Error in {context}: {e}", exc_info=e)
-    if not CONTINUE_ON_CONNECTOR_FAILURE:
-        raise
--- a/backend/requirements/default.txt
+++ b/backend/requirements/default.txt
@@ -1,10 +1,9 @@
-aioboto3==14.0.0
 aiohttp==3.10.2
 alembic==1.10.4
 asyncpg==0.27.0
 atlassian-python-api==3.41.16
 beautifulsoup4==4.12.3
-boto3==1.36.23
+boto3==1.34.84
 celery==5.5.0b4
 chardet==5.2.0
 dask==2023.8.1
--- a/backend/requirements/model_server.txt
+++ b/backend/requirements/model_server.txt
@@ -13,5 +13,4 @@ transformers==4.39.2
 uvicorn==0.21.1
 voyageai==0.2.3
 litellm==1.61.16
-sentry-sdk[fastapi,celery,starlette]==2.14.0
-aioboto3==13.4.0
+sentry-sdk[fastapi,celery,starlette]==2.14.0
--- a/backend/scripts/chat_history_seeding.py
+++ b/backend/scripts/chat_history_seeding.py
@@ -1,45 +0,0 @@
-import argparse
-import logging
-from logging import getLogger
-
-from onyx.db.seeding.chat_history_seeding import seed_chat_history
-
-# Configure the logger
-logging.basicConfig(
-    level=logging.INFO,  # Set the log level (DEBUG, INFO, WARNING, ERROR, CRITICAL)
-    format="%(asctime)s - %(name)s - %(levelname)s - %(message)s",  # Log format
-    handlers=[logging.StreamHandler()],  # Output logs to console
-)
-
-logger = getLogger(__name__)
-
-
-def go_main(num_sessions: int, num_messages: int, num_days: int) -> None:
-    seed_chat_history(num_sessions, num_messages, num_days)
-
-
-if __name__ == "__main__":
-    parser = argparse.ArgumentParser(description="Seed chat history")
-    parser.add_argument(
-        "--sessions",
-        type=int,
-        default=2048,
-        help="Number of chat sessions to seed",
-    )
-
-    parser.add_argument(
-        "--messages",
-        type=int,
-        default=4,
-        help="Number of chat messages to seed per session",
-    )
-
-    parser.add_argument(
-        "--days",
-        type=int,
-        default=90,
-        help="Number of days looking backwards over which to seed the timestamps with",
-    )
-
-    args = parser.parse_args()
-    go_main(args.sessions, args.messages, args.days)
--- a/backend/scripts/debugging/onyx_vespa.py
+++ b/backend/scripts/debugging/onyx_vespa.py
@@ -207,7 +207,7 @@ def query_vespa(
    yql: str, tenant_id: Optional[str] = None, limit: int = 10
 ) -> List[Dict[str, Any]]:
    # Perform a Vespa query using YQL syntax.
-    filters = IndexFilters(tenant_id=None, access_control_list=[])
+    filters = IndexFilters(tenant_id=tenant_id, access_control_list=[])
    filter_string = build_vespa_filters(filters, remove_trailing_and=True)
    full_yql = yql.strip()
    if filter_string:
@@ -472,7 +472,9 @@ def get_document_acls(
            print("-" * 80)


-def get_current_chunk_count(document_id: str) -> int | None:
+def get_current_chunk_count(
+    document_id: str, index_name: str, tenant_id: str
+) -> int | None:
    with get_session_with_current_tenant() as session:
        return (
            session.query(Document.chunk_count)
@@ -484,7 +486,7 @@ def get_current_chunk_count(document_id: str) -> int | None:
 def get_number_of_chunks_we_think_exist(
    document_id: str, index_name: str, tenant_id: str
 ) -> int:
-    current_chunk_count = get_current_chunk_count(document_id)
+    current_chunk_count = get_current_chunk_count(document_id, index_name, tenant_id)
    print(f"Current chunk count: {current_chunk_count}")

    doc_info = VespaIndex.enrich_basic_chunk_info(
@@ -634,7 +636,6 @@ def delete_where(
    Removes visited documents in `cluster` where the given selection
    is true, using Vespa's 'delete where' endpoint.

-
    :param index_name: Typically <namespace>/<document-type> from your schema
    :param selection:  The selection string, e.g., "true" or "foo contains 'bar'"
    :param cluster:    The name of the cluster where documents reside
@@ -798,7 +799,7 @@ def main() -> None:
    args = parser.parse_args()
    vespa_debug = VespaDebugging(args.tenant_id)

-    CURRENT_TENANT_ID_CONTEXTVAR.set(args.tenant_id or "public")
+    CURRENT_TENANT_ID_CONTEXTVAR.set(args.tenant_id)
    if args.action == "delete-all-documents":
        if not args.tenant_id:
            parser.error("--tenant-id is required for delete-all-documents action")
--- a/backend/scripts/query_time_check/seed_dummy_docs.py
+++ b/backend/scripts/query_time_check/seed_dummy_docs.py
@@ -71,7 +71,6 @@ def generate_dummy_chunk(
        title_embedding=generate_random_embedding(embedding_dim),
        large_chunk_id=None,
        large_chunk_reference_ids=[],
-        image_file_name=None,
    )

    document_set_names = []
--- a/backend/shared_configs/configs.py
+++ b/backend/shared_configs/configs.py
@@ -68,12 +68,6 @@ LOG_LEVEL = os.environ.get("LOG_LEVEL", "info")
 # allow us to specify a custom timeout
 API_BASED_EMBEDDING_TIMEOUT = int(os.environ.get("API_BASED_EMBEDDING_TIMEOUT", "600"))

-# Local batch size for VertexAI embedding models currently calibrated for item size of 512 tokens
-# NOTE: increasing this value may lead to API errors due to token limit exhaustion per call.
-VERTEXAI_EMBEDDING_LOCAL_BATCH_SIZE = int(
-    os.environ.get("VERTEXAI_EMBEDDING_LOCAL_BATCH_SIZE", "25")
-)
-
 # Only used for OpenAI
 OPENAI_EMBEDDING_TIMEOUT = int(
    os.environ.get("OPENAI_EMBEDDING_TIMEOUT", API_BASED_EMBEDDING_TIMEOUT)
@@ -206,12 +200,12 @@ SUPPORTED_EMBEDDING_MODELS = [
        index_name="danswer_chunk_text_embedding_3_small",
    ),
    SupportedEmbeddingModel(
-        name="google/text-embedding-005",
+        name="google/text-embedding-004",
        dim=768,
        index_name="danswer_chunk_google_text_embedding_004",
    ),
    SupportedEmbeddingModel(
-        name="google/text-embedding-005",
+        name="google/text-embedding-004",
        dim=768,
        index_name="danswer_chunk_text_embedding_004",
    ),
--- a/backend/shared_configs/enums.py
+++ b/backend/shared_configs/enums.py
@@ -13,7 +13,6 @@ class EmbeddingProvider(str, Enum):
 class RerankerProvider(str, Enum):
    COHERE = "cohere"
    LITELLM = "litellm"
-    BEDROCK = "bedrock"


 class EmbedTextType(str, Enum):
--- a/backend/tests/integration/tests/query_history/test_usage_reports.py
+++ b/backend/tests/integration/tests/query_history/test_usage_reports.py
@@ -1,46 +0,0 @@
-from datetime import datetime
-from datetime import timedelta
-from datetime import timezone
-
-from ee.onyx.db.usage_export import get_all_empty_chat_message_entries
-from onyx.db.engine import get_session_with_current_tenant
-from onyx.db.seeding.chat_history_seeding import seed_chat_history
-
-
-def test_usage_reports(reset: None) -> None:
-    EXPECTED_SESSIONS = 2048
-    MESSAGES_PER_SESSION = 4
-    EXPECTED_MESSAGES = EXPECTED_SESSIONS * MESSAGES_PER_SESSION
-
-    seed_chat_history(EXPECTED_SESSIONS, MESSAGES_PER_SESSION, 90)
-
-    with get_session_with_current_tenant() as db_session:
-        # count of all entries should be exact
-        period = (
-            datetime.fromtimestamp(0, tz=timezone.utc),
-            datetime.now(tz=timezone.utc),
-        )
-
-        count = 0
-        for entry_batch in get_all_empty_chat_message_entries(db_session, period):
-            for entry in entry_batch:
-                count += 1
-
-        assert count == EXPECTED_MESSAGES
-
-        # count in a one month time range should be within a certain range statistically
-        # this can be improved if we seed the chat history data deterministically
-        period = (
-            datetime.now(tz=timezone.utc) - timedelta(days=30),
-            datetime.now(tz=timezone.utc),
-        )
-
-        count = 0
-        for entry_batch in get_all_empty_chat_message_entries(db_session, period):
-            for entry in entry_batch:
-                count += 1
-
-        lower = EXPECTED_MESSAGES // 3 - (EXPECTED_MESSAGES // (3 * 3))
-        upper = EXPECTED_MESSAGES // 3 + (EXPECTED_MESSAGES // (3 * 3))
-        assert count > lower
-        assert count < upper
--- a/backend/tests/unit/ee/onyx/external_permissions/salesforce/test_postprocessing.py
+++ b/backend/tests/unit/ee/onyx/external_permissions/salesforce/test_postprocessing.py
@@ -31,7 +31,6 @@ def create_test_chunk(
        metadata={},
        match_highlights=[],
        updated_at=datetime.now(),
-        image_file_name=None,
    )


--- a/backend/tests/unit/onyx/chat/conftest.py
+++ b/backend/tests/unit/onyx/chat/conftest.py
@@ -80,7 +80,6 @@ def mock_inference_sections() -> list[InferenceSection]:
                updated_at=datetime(2023, 1, 1),
                source_links={0: "https://example.com/doc1"},
                match_highlights=[],
-                image_file_name=None,
            ),
            chunks=MagicMock(),
        ),
@@ -103,7 +102,6 @@ def mock_inference_sections() -> list[InferenceSection]:
                updated_at=datetime(2023, 1, 2),
                source_links={0: "https://example.com/doc2"},
                match_highlights=[],
-                image_file_name=None,
            ),
            chunks=MagicMock(),
        ),
--- a/backend/tests/unit/onyx/chat/stream_processing/test_quotes_processing.py
+++ b/backend/tests/unit/onyx/chat/stream_processing/test_quotes_processing.py
@@ -150,7 +150,6 @@ def test_fuzzy_match_quotes_to_docs() -> None:
        metadata={},
        match_highlights=[],
        updated_at=None,
-        image_file_name=None,
    )
    test_chunk_1 = InferenceChunk(
        document_id="test doc 1",
@@ -169,7 +168,6 @@ def test_fuzzy_match_quotes_to_docs() -> None:
        metadata={},
        match_highlights=[],
        updated_at=None,
-        image_file_name=None,
    )

    test_quotes = [
--- a/backend/tests/unit/onyx/chat/test_prune_and_merge.py
+++ b/backend/tests/unit/onyx/chat/test_prune_and_merge.py
@@ -37,7 +37,6 @@ def create_inference_chunk(
        metadata={},
        match_highlights=[],
        updated_at=None,
-        image_file_name=None,
    )


--- a/backend/tests/unit/onyx/indexing/test_embedder.py
+++ b/backend/tests/unit/onyx/indexing/test_embedder.py
@@ -62,7 +62,6 @@ def test_default_indexing_embedder_embed_chunks(mock_embedding_model: Mock) -> N
            mini_chunk_texts=None,
            large_chunk_reference_ids=[],
            large_chunk_id=None,
-            image_file_name=None,
        )
    ]

--- a/deployment/docker_compose/docker-compose.dev.yml
+++ b/deployment/docker_compose/docker-compose.dev.yml
@@ -118,7 +118,6 @@ services:
      - API_KEY_HASH_ROUNDS=${API_KEY_HASH_ROUNDS:-}
      # Seeding configuration
      - USE_IAM_AUTH=${USE_IAM_AUTH:-}
-      - ONYX_QUERY_HISTORY_TYPE=${ONYX_QUERY_HISTORY_TYPE:-}
    # Uncomment the line below to use if IAM_AUTH is true and you are using iam auth for postgres
    # volumes:
    #   - ./bundle.pem:/app/bundle.pem:ro
--- a/deployment/docker_compose/docker-compose.gpu-dev.yml
+++ b/deployment/docker_compose/docker-compose.gpu-dev.yml
@@ -95,7 +95,6 @@ services:
      # Enterprise Edition only
      - API_KEY_HASH_ROUNDS=${API_KEY_HASH_ROUNDS:-}
      - ENABLE_PAID_ENTERPRISE_EDITION_FEATURES=${ENABLE_PAID_ENTERPRISE_EDITION_FEATURES:-false}
-      - ONYX_QUERY_HISTORY_TYPE=${ONYX_QUERY_HISTORY_TYPE:-}
    # Uncomment the line below to use if IAM_AUTH is true and you are using iam auth for postgres
    # volumes:
    #   - ./bundle.pem:/app/bundle.pem:ro
--- a/web/package-lock.json
+++ b/web/package-lock.json
@@ -44,7 +44,6 @@
        "autoprefixer": "^10.4.14",
        "class-variance-authority": "^0.7.0",
        "clsx": "^2.1.1",
-        "cmdk": "^1.0.0",
        "date-fns": "^3.6.0",
        "favicon-fetch": "^1.0.0",
        "formik": "^2.2.9",
@@ -9314,438 +9313,6 @@
        "node": ">=6"
      }
    },
-    "node_modules/cmdk": {
-      "version": "1.0.0",
-      "resolved": "https://registry.npmjs.org/cmdk/-/cmdk-1.0.0.tgz",
-      "integrity": "sha512-gDzVf0a09TvoJ5jnuPvygTB77+XdOSwEmJ88L6XPFPlv7T3RxbP9jgenfylrAMD0+Le1aO0nVjQUzl2g+vjz5Q==",
-      "license": "MIT",
-      "dependencies": {
-        "@radix-ui/react-dialog": "1.0.5",
-        "@radix-ui/react-primitive": "1.0.3"
-      },
-      "peerDependencies": {
-        "react": "^18.0.0",
-        "react-dom": "^18.0.0"
-      }
-    },
-    "node_modules/cmdk/node_modules/@radix-ui/primitive": {
-      "version": "1.0.1",
-      "resolved": "https://registry.npmjs.org/@radix-ui/primitive/-/primitive-1.0.1.tgz",
-      "integrity": "sha512-yQ8oGX2GVsEYMWGxcovu1uGWPCxV5BFfeeYxqPmuAzUyLT9qmaMXSAhXpb0WrspIeqYzdJpkh2vHModJPgRIaw==",
-      "license": "MIT",
-      "dependencies": {
-        "@babel/runtime": "^7.13.10"
-      }
-    },
-    "node_modules/cmdk/node_modules/@radix-ui/react-compose-refs": {
-      "version": "1.0.1",
-      "resolved": "https://registry.npmjs.org/@radix-ui/react-compose-refs/-/react-compose-refs-1.0.1.tgz",
-      "integrity": "sha512-fDSBgd44FKHa1FRMU59qBMPFcl2PZE+2nmqunj+BWFyYYjnhIDWL2ItDs3rrbJDQOtzt5nIebLCQc4QRfz6LJw==",
-      "license": "MIT",
-      "dependencies": {
-        "@babel/runtime": "^7.13.10"
-      },
-      "peerDependencies": {
-        "@types/react": "*",
-        "react": "^16.8 || ^17.0 || ^18.0"
-      },
-      "peerDependenciesMeta": {
-        "@types/react": {
-          "optional": true
-        }
-      }
-    },
-    "node_modules/cmdk/node_modules/@radix-ui/react-context": {
-      "version": "1.0.1",
-      "resolved": "https://registry.npmjs.org/@radix-ui/react-context/-/react-context-1.0.1.tgz",
-      "integrity": "sha512-ebbrdFoYTcuZ0v4wG5tedGnp9tzcV8awzsxYph7gXUyvnNLuTIcCk1q17JEbnVhXAKG9oX3KtchwiMIAYp9NLg==",
-      "license": "MIT",
-      "dependencies": {
-        "@babel/runtime": "^7.13.10"
-      },
-      "peerDependencies": {
-        "@types/react": "*",
-        "react": "^16.8 || ^17.0 || ^18.0"
-      },
-      "peerDependenciesMeta": {
-        "@types/react": {
-          "optional": true
-        }
-      }
-    },
-    "node_modules/cmdk/node_modules/@radix-ui/react-dialog": {
-      "version": "1.0.5",
-      "resolved": "https://registry.npmjs.org/@radix-ui/react-dialog/-/react-dialog-1.0.5.tgz",
-      "integrity": "sha512-GjWJX/AUpB703eEBanuBnIWdIXg6NvJFCXcNlSZk4xdszCdhrJgBoUd1cGk67vFO+WdA2pfI/plOpqz/5GUP6Q==",
-      "license": "MIT",
-      "dependencies": {
-        "@babel/runtime": "^7.13.10",
-        "@radix-ui/primitive": "1.0.1",
-        "@radix-ui/react-compose-refs": "1.0.1",
-        "@radix-ui/react-context": "1.0.1",
-        "@radix-ui/react-dismissable-layer": "1.0.5",
-        "@radix-ui/react-focus-guards": "1.0.1",
-        "@radix-ui/react-focus-scope": "1.0.4",
-        "@radix-ui/react-id": "1.0.1",
-        "@radix-ui/react-portal": "1.0.4",
-        "@radix-ui/react-presence": "1.0.1",
-        "@radix-ui/react-primitive": "1.0.3",
-        "@radix-ui/react-slot": "1.0.2",
-        "@radix-ui/react-use-controllable-state": "1.0.1",
-        "aria-hidden": "^1.1.1",
-        "react-remove-scroll": "2.5.5"
-      },
-      "peerDependencies": {
-        "@types/react": "*",
-        "@types/react-dom": "*",
-        "react": "^16.8 || ^17.0 || ^18.0",
-        "react-dom": "^16.8 || ^17.0 || ^18.0"
-      },
-      "peerDependenciesMeta": {
-        "@types/react": {
-          "optional": true
-        },
-        "@types/react-dom": {
-          "optional": true
-        }
-      }
-    },
-    "node_modules/cmdk/node_modules/@radix-ui/react-dismissable-layer": {
-      "version": "1.0.5",
-      "resolved": "https://registry.npmjs.org/@radix-ui/react-dismissable-layer/-/react-dismissable-layer-1.0.5.tgz",
-      "integrity": "sha512-aJeDjQhywg9LBu2t/At58hCvr7pEm0o2Ke1x33B+MhjNmmZ17sy4KImo0KPLgsnc/zN7GPdce8Cnn0SWvwZO7g==",
-      "license": "MIT",
-      "dependencies": {
-        "@babel/runtime": "^7.13.10",
-        "@radix-ui/primitive": "1.0.1",
-        "@radix-ui/react-compose-refs": "1.0.1",
-        "@radix-ui/react-primitive": "1.0.3",
-        "@radix-ui/react-use-callback-ref": "1.0.1",
-        "@radix-ui/react-use-escape-keydown": "1.0.3"
-      },
-      "peerDependencies": {
-        "@types/react": "*",
-        "@types/react-dom": "*",
-        "react": "^16.8 || ^17.0 || ^18.0",
-        "react-dom": "^16.8 || ^17.0 || ^18.0"
-      },
-      "peerDependenciesMeta": {
-        "@types/react": {
-          "optional": true
-        },
-        "@types/react-dom": {
-          "optional": true
-        }
-      }
-    },
-    "node_modules/cmdk/node_modules/@radix-ui/react-dismissable-layer/node_modules/@radix-ui/react-use-callback-ref": {
-      "version": "1.0.1",
-      "resolved": "https://registry.npmjs.org/@radix-ui/react-use-callback-ref/-/react-use-callback-ref-1.0.1.tgz",
-      "integrity": "sha512-D94LjX4Sp0xJFVaoQOd3OO9k7tpBYNOXdVhkltUbGv2Qb9OXdrg/CpsjlZv7ia14Sylv398LswWBVVu5nqKzAQ==",
-      "license": "MIT",
-      "dependencies": {
-        "@babel/runtime": "^7.13.10"
-      },
-      "peerDependencies": {
-        "@types/react": "*",
-        "react": "^16.8 || ^17.0 || ^18.0"
-      },
-      "peerDependenciesMeta": {
-        "@types/react": {
-          "optional": true
-        }
-      }
-    },
-    "node_modules/cmdk/node_modules/@radix-ui/react-dismissable-layer/node_modules/@radix-ui/react-use-escape-keydown": {
-      "version": "1.0.3",
-      "resolved": "https://registry.npmjs.org/@radix-ui/react-use-escape-keydown/-/react-use-escape-keydown-1.0.3.tgz",
-      "integrity": "sha512-vyL82j40hcFicA+M4Ex7hVkB9vHgSse1ZWomAqV2Je3RleKGO5iM8KMOEtfoSB0PnIelMd2lATjTGMYqN5ylTg==",
-      "license": "MIT",
-      "dependencies": {
-        "@babel/runtime": "^7.13.10",
-        "@radix-ui/react-use-callback-ref": "1.0.1"
-      },
-      "peerDependencies": {
-        "@types/react": "*",
-        "react": "^16.8 || ^17.0 || ^18.0"
-      },
-      "peerDependenciesMeta": {
-        "@types/react": {
-          "optional": true
-        }
-      }
-    },
-    "node_modules/cmdk/node_modules/@radix-ui/react-focus-guards": {
-      "version": "1.0.1",
-      "resolved": "https://registry.npmjs.org/@radix-ui/react-focus-guards/-/react-focus-guards-1.0.1.tgz",
-      "integrity": "sha512-Rect2dWbQ8waGzhMavsIbmSVCgYxkXLxxR3ZvCX79JOglzdEy4JXMb98lq4hPxUbLr77nP0UOGf4rcMU+s1pUA==",
-      "license": "MIT",
-      "dependencies": {
-        "@babel/runtime": "^7.13.10"
-      },
-      "peerDependencies": {
-        "@types/react": "*",
-        "react": "^16.8 || ^17.0 || ^18.0"
-      },
-      "peerDependenciesMeta": {
-        "@types/react": {
-          "optional": true
-        }
-      }
-    },
-    "node_modules/cmdk/node_modules/@radix-ui/react-focus-scope": {
-      "version": "1.0.4",
-      "resolved": "https://registry.npmjs.org/@radix-ui/react-focus-scope/-/react-focus-scope-1.0.4.tgz",
-      "integrity": "sha512-sL04Mgvf+FmyvZeYfNu1EPAaaxD+aw7cYeIB9L9Fvq8+urhltTRaEo5ysKOpHuKPclsZcSUMKlN05x4u+CINpA==",
-      "license": "MIT",
-      "dependencies": {
-        "@babel/runtime": "^7.13.10",
-        "@radix-ui/react-compose-refs": "1.0.1",
-        "@radix-ui/react-primitive": "1.0.3",
-        "@radix-ui/react-use-callback-ref": "1.0.1"
-      },
-      "peerDependencies": {
-        "@types/react": "*",
-        "@types/react-dom": "*",
-        "react": "^16.8 || ^17.0 || ^18.0",
-        "react-dom": "^16.8 || ^17.0 || ^18.0"
-      },
-      "peerDependenciesMeta": {
-        "@types/react": {
-          "optional": true
-        },
-        "@types/react-dom": {
-          "optional": true
-        }
-      }
-    },
-    "node_modules/cmdk/node_modules/@radix-ui/react-focus-scope/node_modules/@radix-ui/react-use-callback-ref": {
-      "version": "1.0.1",
-      "resolved": "https://registry.npmjs.org/@radix-ui/react-use-callback-ref/-/react-use-callback-ref-1.0.1.tgz",
-      "integrity": "sha512-D94LjX4Sp0xJFVaoQOd3OO9k7tpBYNOXdVhkltUbGv2Qb9OXdrg/CpsjlZv7ia14Sylv398LswWBVVu5nqKzAQ==",
-      "license": "MIT",
-      "dependencies": {
-        "@babel/runtime": "^7.13.10"
-      },
-      "peerDependencies": {
-        "@types/react": "*",
-        "react": "^16.8 || ^17.0 || ^18.0"
-      },
-      "peerDependenciesMeta": {
-        "@types/react": {
-          "optional": true
-        }
-      }
-    },
-    "node_modules/cmdk/node_modules/@radix-ui/react-id": {
-      "version": "1.0.1",
-      "resolved": "https://registry.npmjs.org/@radix-ui/react-id/-/react-id-1.0.1.tgz",
-      "integrity": "sha512-tI7sT/kqYp8p96yGWY1OAnLHrqDgzHefRBKQ2YAkBS5ja7QLcZ9Z/uY7bEjPUatf8RomoXM8/1sMj1IJaE5UzQ==",
-      "license": "MIT",
-      "dependencies": {
-        "@babel/runtime": "^7.13.10",
-        "@radix-ui/react-use-layout-effect": "1.0.1"
-      },
-      "peerDependencies": {
-        "@types/react": "*",
-        "react": "^16.8 || ^17.0 || ^18.0"
-      },
-      "peerDependenciesMeta": {
-        "@types/react": {
-          "optional": true
-        }
-      }
-    },
-    "node_modules/cmdk/node_modules/@radix-ui/react-id/node_modules/@radix-ui/react-use-layout-effect": {
-      "version": "1.0.1",
-      "resolved": "https://registry.npmjs.org/@radix-ui/react-use-layout-effect/-/react-use-layout-effect-1.0.1.tgz",
-      "integrity": "sha512-v/5RegiJWYdoCvMnITBkNNx6bCj20fiaJnWtRkU18yITptraXjffz5Qbn05uOiQnOvi+dbkznkoaMltz1GnszQ==",
-      "license": "MIT",
-      "dependencies": {
-        "@babel/runtime": "^7.13.10"
-      },
-      "peerDependencies": {
-        "@types/react": "*",
-        "react": "^16.8 || ^17.0 || ^18.0"
-      },
-      "peerDependenciesMeta": {
-        "@types/react": {
-          "optional": true
-        }
-      }
-    },
-    "node_modules/cmdk/node_modules/@radix-ui/react-portal": {
-      "version": "1.0.4",
-      "resolved": "https://registry.npmjs.org/@radix-ui/react-portal/-/react-portal-1.0.4.tgz",
-      "integrity": "sha512-Qki+C/EuGUVCQTOTD5vzJzJuMUlewbzuKyUy+/iHM2uwGiru9gZeBJtHAPKAEkB5KWGi9mP/CHKcY0wt1aW45Q==",
-      "license": "MIT",
-      "dependencies": {
-        "@babel/runtime": "^7.13.10",
-        "@radix-ui/react-primitive": "1.0.3"
-      },
-      "peerDependencies": {
-        "@types/react": "*",
-        "@types/react-dom": "*",
-        "react": "^16.8 || ^17.0 || ^18.0",
-        "react-dom": "^16.8 || ^17.0 || ^18.0"
-      },
-      "peerDependenciesMeta": {
-        "@types/react": {
-          "optional": true
-        },
-        "@types/react-dom": {
-          "optional": true
-        }
-      }
-    },
-    "node_modules/cmdk/node_modules/@radix-ui/react-presence": {
-      "version": "1.0.1",
-      "resolved": "https://registry.npmjs.org/@radix-ui/react-presence/-/react-presence-1.0.1.tgz",
-      "integrity": "sha512-UXLW4UAbIY5ZjcvzjfRFo5gxva8QirC9hF7wRE4U5gz+TP0DbRk+//qyuAQ1McDxBt1xNMBTaciFGvEmJvAZCg==",
-      "license": "MIT",
-      "dependencies": {
-        "@babel/runtime": "^7.13.10",
-        "@radix-ui/react-compose-refs": "1.0.1",
-        "@radix-ui/react-use-layout-effect": "1.0.1"
-      },
-      "peerDependencies": {
-        "@types/react": "*",
-        "@types/react-dom": "*",
-        "react": "^16.8 || ^17.0 || ^18.0",
-        "react-dom": "^16.8 || ^17.0 || ^18.0"
-      },
-      "peerDependenciesMeta": {
-        "@types/react": {
-          "optional": true
-        },
-        "@types/react-dom": {
-          "optional": true
-        }
-      }
-    },
-    "node_modules/cmdk/node_modules/@radix-ui/react-presence/node_modules/@radix-ui/react-use-layout-effect": {
-      "version": "1.0.1",
-      "resolved": "https://registry.npmjs.org/@radix-ui/react-use-layout-effect/-/react-use-layout-effect-1.0.1.tgz",
-      "integrity": "sha512-v/5RegiJWYdoCvMnITBkNNx6bCj20fiaJnWtRkU18yITptraXjffz5Qbn05uOiQnOvi+dbkznkoaMltz1GnszQ==",
-      "license": "MIT",
-      "dependencies": {
-        "@babel/runtime": "^7.13.10"
-      },
-      "peerDependencies": {
-        "@types/react": "*",
-        "react": "^16.8 || ^17.0 || ^18.0"
-      },
-      "peerDependenciesMeta": {
-        "@types/react": {
-          "optional": true
-        }
-      }
-    },
-    "node_modules/cmdk/node_modules/@radix-ui/react-primitive": {
-      "version": "1.0.3",
-      "resolved": "https://registry.npmjs.org/@radix-ui/react-primitive/-/react-primitive-1.0.3.tgz",
-      "integrity": "sha512-yi58uVyoAcK/Nq1inRY56ZSjKypBNKTa/1mcL8qdl6oJeEaDbOldlzrGn7P6Q3Id5d+SYNGc5AJgc4vGhjs5+g==",
-      "license": "MIT",
-      "dependencies": {
-        "@babel/runtime": "^7.13.10",
-        "@radix-ui/react-slot": "1.0.2"
-      },
-      "peerDependencies": {
-        "@types/react": "*",
-        "@types/react-dom": "*",
-        "react": "^16.8 || ^17.0 || ^18.0",
-        "react-dom": "^16.8 || ^17.0 || ^18.0"
-      },
-      "peerDependenciesMeta": {
-        "@types/react": {
-          "optional": true
-        },
-        "@types/react-dom": {
-          "optional": true
-        }
-      }
-    },
-    "node_modules/cmdk/node_modules/@radix-ui/react-slot": {
-      "version": "1.0.2",
-      "resolved": "https://registry.npmjs.org/@radix-ui/react-slot/-/react-slot-1.0.2.tgz",
-      "integrity": "sha512-YeTpuq4deV+6DusvVUW4ivBgnkHwECUu0BiN43L5UCDFgdhsRUWAghhTF5MbvNTPzmiFOx90asDSUjWuCNapwg==",
-      "license": "MIT",
-      "dependencies": {
-        "@babel/runtime": "^7.13.10",
-        "@radix-ui/react-compose-refs": "1.0.1"
-      },
-      "peerDependencies": {
-        "@types/react": "*",
-        "react": "^16.8 || ^17.0 || ^18.0"
-      },
-      "peerDependenciesMeta": {
-        "@types/react": {
-          "optional": true
-        }
-      }
-    },
-    "node_modules/cmdk/node_modules/@radix-ui/react-use-controllable-state": {
-      "version": "1.0.1",
-      "resolved": "https://registry.npmjs.org/@radix-ui/react-use-controllable-state/-/react-use-controllable-state-1.0.1.tgz",
-      "integrity": "sha512-Svl5GY5FQeN758fWKrjM6Qb7asvXeiZltlT4U2gVfl8Gx5UAv2sMR0LWo8yhsIZh2oQ0eFdZ59aoOOMV7b47VA==",
-      "license": "MIT",
-      "dependencies": {
-        "@babel/runtime": "^7.13.10",
-        "@radix-ui/react-use-callback-ref": "1.0.1"
-      },
-      "peerDependencies": {
-        "@types/react": "*",
-        "react": "^16.8 || ^17.0 || ^18.0"
-      },
-      "peerDependenciesMeta": {
-        "@types/react": {
-          "optional": true
-        }
-      }
-    },
-    "node_modules/cmdk/node_modules/@radix-ui/react-use-controllable-state/node_modules/@radix-ui/react-use-callback-ref": {
-      "version": "1.0.1",
-      "resolved": "https://registry.npmjs.org/@radix-ui/react-use-callback-ref/-/react-use-callback-ref-1.0.1.tgz",
-      "integrity": "sha512-D94LjX4Sp0xJFVaoQOd3OO9k7tpBYNOXdVhkltUbGv2Qb9OXdrg/CpsjlZv7ia14Sylv398LswWBVVu5nqKzAQ==",
-      "license": "MIT",
-      "dependencies": {
-        "@babel/runtime": "^7.13.10"
-      },
-      "peerDependencies": {
-        "@types/react": "*",
-        "react": "^16.8 || ^17.0 || ^18.0"
-      },
-      "peerDependenciesMeta": {
-        "@types/react": {
-          "optional": true
-        }
-      }
-    },
-    "node_modules/cmdk/node_modules/react-remove-scroll": {
-      "version": "2.5.5",
-      "resolved": "https://registry.npmjs.org/react-remove-scroll/-/react-remove-scroll-2.5.5.tgz",
-      "integrity": "sha512-ImKhrzJJsyXJfBZ4bzu8Bwpka14c/fQt0k+cyFp/PBhTfyDnU5hjOtM4AG/0AMyy8oKzOTR0lDgJIM7pYXI0kw==",
-      "license": "MIT",
-      "dependencies": {
-        "react-remove-scroll-bar": "^2.3.3",
-        "react-style-singleton": "^2.2.1",
-        "tslib": "^2.1.0",
-        "use-callback-ref": "^1.3.0",
-        "use-sidecar": "^1.1.2"
-      },
-      "engines": {
-        "node": ">=10"
-      },
-      "peerDependencies": {
-        "@types/react": "^16.8.0 || ^17.0.0 || ^18.0.0",
-        "react": "^16.8.0 || ^17.0.0 || ^18.0.0"
-      },
-      "peerDependenciesMeta": {
-        "@types/react": {
-          "optional": true
-        }
-      }
-    },
    "node_modules/co": {
      "version": "4.6.0",
      "resolved": "https://registry.npmjs.org/co/-/co-4.6.0.tgz",
--- a/web/package.json
+++ b/web/package.json
@@ -47,7 +47,6 @@
    "autoprefixer": "^10.4.14",
    "class-variance-authority": "^0.7.0",
    "clsx": "^2.1.1",
-    "cmdk": "^1.0.0",
    "date-fns": "^3.6.0",
    "favicon-fetch": "^1.0.0",
    "formik": "^2.2.9",
--- a/web/public/Amazon.svg
+++ b/web/public/Amazon.svg
--- a/web/public/Anthropic.svg
+++ b/web/public/Anthropic.svg
@@ -1 +1,8 @@
-<svg xmlns="http://www.w3.org/2000/svg" shape-rendering="geometricPrecision" text-rendering="geometricPrecision" image-rendering="optimizeQuality" fill-rule="evenodd" clip-rule="evenodd" viewBox="0 0 512 509.64"><path fill="#D77655" d="M115.612 0h280.775C459.974 0 512 52.026 512 115.612v278.415c0 63.587-52.026 115.612-115.613 115.612H115.612C52.026 509.639 0 457.614 0 394.027V115.612C0 52.026 52.026 0 115.612 0z"/><path fill="#FCF2EE" fill-rule="nonzero" d="M142.27 316.619l73.655-41.326 1.238-3.589-1.238-1.996-3.589-.001-12.31-.759-42.084-1.138-36.498-1.516-35.361-1.896-8.897-1.895-8.34-10.995.859-5.484 7.482-5.03 10.717.935 23.683 1.617 35.537 2.452 25.782 1.517 38.193 3.968h6.064l.86-2.451-2.073-1.517-1.618-1.517-36.776-24.922-39.81-26.338-20.852-15.166-11.273-7.683-5.687-7.204-2.451-15.721 10.237-11.273 13.75.935 3.513.936 13.928 10.716 29.749 23.027 38.848 28.612 5.687 4.727 2.275-1.617.278-1.138-2.553-4.271-21.13-38.193-22.546-38.848-10.035-16.101-2.654-9.655c-.935-3.968-1.617-7.304-1.617-11.374l11.652-15.823 6.445-2.073 15.545 2.073 6.547 5.687 9.655 22.092 15.646 34.78 24.265 47.291 7.103 14.028 3.791 12.992 1.416 3.968 2.449-.001v-2.275l1.997-26.641 3.69-32.707 3.589-42.084 1.239-11.854 5.863-14.206 11.652-7.683 9.099 4.348 7.482 10.716-1.036 6.926-4.449 28.915-8.72 45.294-5.687 30.331h3.313l3.792-3.791 15.342-20.372 25.782-32.227 11.374-12.789 13.27-14.129 8.517-6.724 16.1-.001 11.854 17.617-5.307 18.199-16.581 21.029-13.75 17.819-19.716 26.54-12.309 21.231 1.138 1.694 2.932-.278 44.536-9.479 24.062-4.347 28.714-4.928 12.992 6.066 1.416 6.167-5.106 12.613-30.71 7.583-36.018 7.204-53.636 12.689-.657.48.758.935 24.164 2.275 10.337.556h25.301l47.114 3.514 12.309 8.139 7.381 9.959-1.238 7.583-18.957 9.655-25.579-6.066-59.702-14.205-20.474-5.106-2.83-.001v1.694l17.061 16.682 31.266 28.233 39.152 36.397 1.997 8.999-5.03 7.102-5.307-.758-34.401-25.883-13.27-11.651-30.053-25.302-1.996-.001v2.654l6.926 10.136 36.574 54.975 1.895 16.859-2.653 5.485-9.479 3.311-10.414-1.895-21.408-30.054-22.092-33.844-17.819-30.331-2.173 1.238-10.515 113.261-4.929 5.788-11.374 4.348-9.478-7.204-5.03-11.652 5.03-23.027 6.066-30.052 4.928-23.886 4.449-29.674 2.654-9.858-.177-.657-2.173.278-22.37 30.71-34.021 45.977-26.919 28.815-6.445 2.553-11.173-5.789 1.037-10.337 6.243-9.2 37.257-47.392 22.47-29.371 14.508-16.961-.101-2.451h-.859l-98.954 64.251-17.618 2.275-7.583-7.103.936-11.652 3.589-3.791 29.749-20.474-.101.102.024.101z"/></svg>
+<?xml version="1.0" encoding="UTF-8"?>
+<svg width="256px" height="176px" viewBox="0 0 256 176" version="1.1" xmlns="http://www.w3.org/2000/svg" preserveAspectRatio="xMidYMid">
+    <title>Anthropic</title>
+    <g fill="#181818">
+        <path d="M147.486878,0 C147.486878,0 217.568251,175.780074 217.568251,175.780074 C217.568251,175.780074 256,175.780074 256,175.780074 C256,175.780074 185.918621,0 185.918621,0 C185.918621,0 147.486878,0 147.486878,0 C147.486878,0 147.486878,0 147.486878,0 Z"></path>
+        <path d="M66.1828124,106.221191 C66.1828124,106.221191 90.1624677,44.4471185 90.1624677,44.4471185 C90.1624677,44.4471185 114.142128,106.221191 114.142128,106.221191 C114.142128,106.221191 66.1828124,106.221191 66.1828124,106.221191 C66.1828124,106.221191 66.1828124,106.221191 66.1828124,106.221191 Z M70.0705318,0 C70.0705318,0 0,175.780074 0,175.780074 C0,175.780074 39.179211,175.780074 39.179211,175.780074 C39.179211,175.780074 53.5097704,138.86606 53.5097704,138.86606 C53.5097704,138.86606 126.817544,138.86606 126.817544,138.86606 C126.817544,138.86606 141.145724,175.780074 141.145724,175.780074 C141.145724,175.780074 180.324935,175.780074 180.324935,175.780074 C180.324935,175.780074 110.254409,0 110.254409,0 C110.254409,0 70.0705318,0 70.0705318,0 C70.0705318,0 70.0705318,0 70.0705318,0 Z"></path>
+    </g>
+</svg>
--- a/web/public/Deepseek.svg
+++ b/web/public/Deepseek.svg
@@ -1 +0,0 @@
-<svg height="1em" style="flex:none;line-height:1" viewBox="0 0 24 24" width="1em" xmlns="http://www.w3.org/2000/svg"><title>DeepSeek</title><path d="M23.748 4.482c-.254-.124-.364.113-.512.234-.051.039-.094.09-.137.136-.372.397-.806.657-1.373.626-.829-.046-1.537.214-2.163.848-.133-.782-.575-1.248-1.247-1.548-.352-.156-.708-.311-.955-.65-.172-.241-.219-.51-.305-.774-.055-.16-.11-.323-.293-.35-.2-.031-.278.136-.356.276-.313.572-.434 1.202-.422 1.84.027 1.436.633 2.58 1.838 3.393.137.093.172.187.129.323-.082.28-.18.552-.266.833-.055.179-.137.217-.329.14a5.526 5.526 0 01-1.736-1.18c-.857-.828-1.631-1.742-2.597-2.458a11.365 11.365 0 00-.689-.471c-.985-.957.13-1.743.388-1.836.27-.098.093-.432-.779-.428-.872.004-1.67.295-2.687.684a3.055 3.055 0 01-.465.137 9.597 9.597 0 00-2.883-.102c-1.885.21-3.39 1.102-4.497 2.623C.082 8.606-.231 10.684.152 12.85c.403 2.284 1.569 4.175 3.36 5.653 1.858 1.533 3.997 2.284 6.438 2.14 1.482-.085 3.133-.284 4.994-1.86.47.234.962.327 1.78.397.63.059 1.236-.03 1.705-.128.735-.156.684-.837.419-.961-2.155-1.004-1.682-.595-2.113-.926 1.096-1.296 2.746-2.642 3.392-7.003.05-.347.007-.565 0-.845-.004-.17.035-.237.23-.256a4.173 4.173 0 001.545-.475c1.396-.763 1.96-2.015 2.093-3.517.02-.23-.004-.467-.247-.588zM11.581 18c-2.089-1.642-3.102-2.183-3.52-2.16-.392.024-.321.471-.235.763.09.288.207.486.371.739.114.167.192.416-.113.603-.673.416-1.842-.14-1.897-.167-1.361-.802-2.5-1.86-3.301-3.307-.774-1.393-1.224-2.887-1.298-4.482-.02-.386.093-.522.477-.592a4.696 4.696 0 011.529-.039c2.132.312 3.946 1.265 5.468 2.774.868.86 1.525 1.887 2.202 2.891.72 1.066 1.494 2.082 2.48 2.914.348.292.625.514.891.677-.802.09-2.14.11-3.054-.614zm1-6.44a.306.306 0 01.415-.287.302.302 0 01.2.288.306.306 0 01-.31.307.303.303 0 01-.304-.308zm3.11 1.596c-.2.081-.399.151-.59.16a1.245 1.245 0 01-.798-.254c-.274-.23-.47-.358-.552-.758a1.73 1.73 0 01.016-.588c.07-.327-.008-.537-.239-.727-.187-.156-.426-.199-.688-.199a.559.559 0 01-.254-.078c-.11-.054-.2-.19-.114-.358.028-.054.16-.186.192-.21.356-.202.767-.136 1.146.016.352.144.618.408 1.001.782.391.451.462.576.685.914.176.265.336.537.445.848.067.195-.019.354-.25.452z" fill="#4D6BFE"></path></svg>
--- a/web/src/app/admin/configuration/llm/interfaces.ts
+++ b/web/src/app/admin/configuration/llm/interfaces.ts
@@ -7,12 +7,14 @@ import {
  MicrosoftIconSVG,
  MistralIcon,
  MetaIcon,
+  OpenAIIcon,
  GeminiIcon,
+  OpenSourceIcon,
  AnthropicSVG,
  IconProps,
  OpenAIISVG,
-  DeepseekIcon,
 } from "@/components/icons/icons";
+import { FaRobot } from "react-icons/fa";

 export interface CustomConfigKey {
  name: string;
@@ -74,31 +76,30 @@ export interface LLMProviderDescriptor {
 }

 export const getProviderIcon = (providerName: string, modelName?: string) => {
-  const modelIconMap: Record<
-    string,
-    ({ size, className }: IconProps) => JSX.Element
-  > = {
-    amazon: AmazonIcon,
-    phi: MicrosoftIconSVG,
-    mistral: MistralIcon,
-    ministral: MistralIcon,
-    llama: MetaIcon,
-    gemini: GeminiIcon,
-    deepseek: DeepseekIcon,
-    claude: AnthropicIcon,
-  };
-
  const modelNameToIcon = (
    modelName: string,
    fallbackIcon: ({ size, className }: IconProps) => JSX.Element
  ): (({ size, className }: IconProps) => JSX.Element) => {
-    const lowerModelName = modelName?.toLowerCase();
-    for (const [key, icon] of Object.entries(modelIconMap)) {
-      if (lowerModelName?.includes(key)) {
-        return icon;
-      }
+    if (modelName?.toLowerCase().includes("amazon")) {
+      return AmazonIcon;
+    }
+    if (modelName?.toLowerCase().includes("phi")) {
+      return MicrosoftIconSVG;
+    }
+    if (modelName?.toLowerCase().includes("mistral")) {
+      return MistralIcon;
+    }
+    if (modelName?.toLowerCase().includes("llama")) {
+      return MetaIcon;
+    }
+    if (modelName?.toLowerCase().includes("gemini")) {
+      return GeminiIcon;
+    }
+    if (modelName?.toLowerCase().includes("claude")) {
+      return AnthropicIcon;
+    } else {
+      return fallbackIcon;
    }
-    return fallbackIcon;
  };

  switch (providerName) {
--- a/web/src/app/admin/documents/sets/DocumentSetCreationForm.tsx
+++ b/web/src/app/admin/documents/sets/DocumentSetCreationForm.tsx
@@ -1,6 +1,6 @@
 "use client";

-import { Form, Formik } from "formik";
+import { ArrayHelpers, FieldArray, Form, Formik } from "formik";
 import * as Yup from "yup";
 import { PopupSpec } from "@/components/admin/connectors/Popup";
 import {
@@ -10,14 +10,13 @@ import {
 } from "./lib";
 import { ConnectorStatus, DocumentSet, UserGroup, UserRole } from "@/lib/types";
 import { TextFormField } from "@/components/admin/connectors/Field";
+import { ConnectorTitle } from "@/components/admin/connectors/ConnectorTitle";
 import { Separator } from "@/components/ui/separator";
 import { Button } from "@/components/ui/button";
 import { usePaidEnterpriseFeaturesEnabled } from "@/components/settings/usePaidEnterpriseFeaturesEnabled";
 import { IsPublicGroupSelector } from "@/components/IsPublicGroupSelector";
 import React, { useEffect, useState } from "react";
 import { useUser } from "@/components/user/UserProvider";
-import { ConnectorMultiSelect } from "@/components/ConnectorMultiSelect";
-import { NonSelectableConnectors } from "@/components/NonSelectableConnectors";

 interface SetCreationPopupProps {
  ccPairs: ConnectorStatus<any, any>[];
@@ -46,7 +45,7 @@ export const DocumentSetCreationForm = ({
  }, [existingDocumentSet?.is_public]);

  return (
-    <div className="max-w-full mx-auto">
+    <div>
      <Formik<DocumentSetCreationRequest>
        initialValues={{
          name: existingDocumentSet?.name ?? "",
@@ -105,122 +104,243 @@ export const DocumentSetCreationForm = ({
        }}
      >
        {(props) => {
-          // Filter visible cc pairs for curator role
-          const visibleCcPairs =
-            user?.role === UserRole.CURATOR
-              ? localCcPairs.filter(
-                  (ccPair) =>
-                    ccPair.access_type === "public" ||
-                    (ccPair.groups.length > 0 &&
-                      props.values.groups.every((group) =>
-                        ccPair.groups.includes(group)
-                      ))
-                )
-              : localCcPairs;
-
-          // Filter non-visible cc pairs for curator role
-          const nonVisibleCcPairs =
-            user?.role === UserRole.CURATOR
-              ? localCcPairs.filter(
-                  (ccPair) =>
-                    !(ccPair.access_type === "public") &&
-                    (ccPair.groups.length === 0 ||
-                      !props.values.groups.every((group) =>
-                        ccPair.groups.includes(group)
-                      ))
-                )
-              : [];
-
-          // Deselect filtered out cc pairs
-          if (user?.role === UserRole.CURATOR) {
-            const visibleCcPairIds = visibleCcPairs.map(
-              (ccPair) => ccPair.cc_pair_id
-            );
-            props.values.cc_pair_ids = props.values.cc_pair_ids.filter((id) =>
-              visibleCcPairIds.includes(id)
-            );
-          }
-
          return (
-            <Form className="space-y-6 w-full ">
-              <div className="space-y-4 w-full">
-                <TextFormField
-                  name="name"
-                  label="Name:"
-                  placeholder="A name for the document set"
-                  disabled={isUpdate}
-                  autoCompleteDisabled={true}
-                />
-                <TextFormField
-                  name="description"
-                  label="Description:"
-                  placeholder="Describe what the document set represents"
-                  autoCompleteDisabled={true}
-                  optional={true}
+            <Form>
+              <TextFormField
+                name="name"
+                label="Name:"
+                placeholder="A name for the document set"
+                disabled={isUpdate}
+                autoCompleteDisabled={true}
+              />
+              <TextFormField
+                name="description"
+                label="Description:"
+                placeholder="Describe what the document set represents"
+                autoCompleteDisabled={true}
+                optional={true}
+              />
+
+              {isPaidEnterpriseFeaturesEnabled && (
+                <IsPublicGroupSelector
+                  formikProps={props}
+                  objectName="document set"
                />
+              )}

-                {isPaidEnterpriseFeaturesEnabled && (
-                  <IsPublicGroupSelector
-                    formikProps={props}
-                    objectName="document set"
-                  />
-                )}
-              </div>
+              <Separator />

-              <Separator className="my-6" />
+              {user?.role === UserRole.CURATOR ? (
+                <>
+                  <div className="flex flex-col gap-y-1">
+                    <h2 className="mb-1 font-medium text-base">
+                      These are the connectors available to{" "}
+                      {userGroups && userGroups.length > 1
+                        ? "the selected group"
+                        : "the group you curate"}
+                      :
+                    </h2>

-              <div className="space-y-6">
-                {user?.role === UserRole.CURATOR ? (
-                  <>
-                    <ConnectorMultiSelect
+                    <p className="mb-text-sm">
+                      All documents indexed by these selected connectors will be
+                      a part of this document set.
+                    </p>
+                    <FieldArray
                      name="cc_pair_ids"
-                      label={`Connectors available to ${
-                        userGroups && userGroups.length > 1
-                          ? "the selected group"
-                          : "the group you curate"
-                      }`}
-                      connectors={visibleCcPairs}
-                      selectedIds={props.values.cc_pair_ids}
-                      onChange={(selectedIds) => {
-                        props.setFieldValue("cc_pair_ids", selectedIds);
+                      render={(arrayHelpers: ArrayHelpers) => {
+                        // Filter visible cc pairs
+                        const visibleCcPairs = localCcPairs.filter(
+                          (ccPair) =>
+                            ccPair.access_type === "public" ||
+                            (ccPair.groups.length > 0 &&
+                              props.values.groups.every((group) =>
+                                ccPair.groups.includes(group)
+                              ))
+                        );
+
+                        // Deselect filtered out cc pairs
+                        const visibleCcPairIds = visibleCcPairs.map(
+                          (ccPair) => ccPair.cc_pair_id
+                        );
+                        props.values.cc_pair_ids =
+                          props.values.cc_pair_ids.filter((id) =>
+                            visibleCcPairIds.includes(id)
+                          );
+
+                        return (
+                          <div className="mb-3 flex gap-2 flex-wrap">
+                            {visibleCcPairs.map((ccPair) => {
+                              const ind = props.values.cc_pair_ids.indexOf(
+                                ccPair.cc_pair_id
+                              );
+                              const isSelected = ind !== -1;
+                              return (
+                                <div
+                                  key={`${ccPair.connector.id}-${ccPair.credential.id}`}
+                                  className={
+                                    `
+                                  px-3 
+                                  py-1
+                                  rounded-lg 
+                                  border
+                                  border-border 
+                                  w-fit 
+                                  flex 
+                                  cursor-pointer ` +
+                                    (isSelected
+                                      ? " bg-background-200"
+                                      : " hover:bg-accent-background-hovered")
+                                  }
+                                  onClick={() => {
+                                    if (isSelected) {
+                                      arrayHelpers.remove(ind);
+                                    } else {
+                                      arrayHelpers.push(ccPair.cc_pair_id);
+                                    }
+                                  }}
+                                >
+                                  <div className="my-auto">
+                                    <ConnectorTitle
+                                      connector={ccPair.connector}
+                                      ccPairId={ccPair.cc_pair_id}
+                                      ccPairName={ccPair.name}
+                                      isLink={false}
+                                      showMetadata={false}
+                                    />
+                                  </div>
+                                </div>
+                              );
+                            })}
+                          </div>
+                        );
                      }}
-                      placeholder="Search for connectors..."
                    />
+                  </div>

-                    <NonSelectableConnectors
-                      connectors={nonVisibleCcPairs}
-                      title={`Connectors not available to the ${
-                        userGroups && userGroups.length > 1
-                          ? `group${
-                              props.values.groups.length > 1 ? "s" : ""
-                            } you have selected`
-                          : "group you curate"
-                      }`}
-                      description="Only connectors that are directly assigned to the group you are trying to add the document set to will be available."
+                  <div>
+                    <FieldArray
+                      name="cc_pair_ids"
+                      render={() => {
+                        // Filter non-visible cc pairs
+                        const nonVisibleCcPairs = localCcPairs.filter(
+                          (ccPair) =>
+                            !(ccPair.access_type === "public") &&
+                            (ccPair.groups.length === 0 ||
+                              !props.values.groups.every((group) =>
+                                ccPair.groups.includes(group)
+                              ))
+                        );
+
+                        return nonVisibleCcPairs.length > 0 ? (
+                          <>
+                            <Separator />
+                            <h2 className="mb-1 font-medium text-base">
+                              These connectors are not available to the{" "}
+                              {userGroups && userGroups.length > 1
+                                ? `group${
+                                    props.values.groups.length > 1 ? "s" : ""
+                                  } you have selected`
+                                : "group you curate"}
+                              :
+                            </h2>
+                            <p className="mb-3 text-sm">
+                              Only connectors that are directly assigned to the
+                              group you are trying to add the document set to
+                              will be available.
+                            </p>
+                            <div className="mb-3 flex gap-2 flex-wrap">
+                              {nonVisibleCcPairs.map((ccPair) => (
+                                <div
+                                  key={`${ccPair.connector.id}-${ccPair.credential.id}`}
+                                  className="px-3 py-1 rounded-lg border border-non-selectable-border w-fit flex cursor-not-allowed"
+                                >
+                                  <div className="my-auto">
+                                    <ConnectorTitle
+                                      connector={ccPair.connector}
+                                      ccPairId={ccPair.cc_pair_id}
+                                      ccPairName={ccPair.name}
+                                      isLink={false}
+                                      showMetadata={false}
+                                    />
+                                  </div>
+                                </div>
+                              ))}
+                            </div>
+                          </>
+                        ) : null;
+                      }}
                    />
-                  </>
-                ) : (
-                  <ConnectorMultiSelect
+                  </div>
+                </>
+              ) : (
+                <div>
+                  <h2 className="mb-1 font-medium text-base">
+                    Pick your connectors:
+                  </h2>
+                  <p className="mb-3 text-xs">
+                    All documents indexed by the selected connectors will be a
+                    part of this document set.
+                  </p>
+                  <FieldArray
                    name="cc_pair_ids"
-                    label="Pick your connectors"
-                    connectors={visibleCcPairs}
-                    selectedIds={props.values.cc_pair_ids}
-                    onChange={(selectedIds) => {
-                      props.setFieldValue("cc_pair_ids", selectedIds);
-                    }}
-                    placeholder="Search for connectors..."
+                    render={(arrayHelpers: ArrayHelpers) => (
+                      <div className="mb-3 flex gap-2 flex-wrap">
+                        {ccPairs.map((ccPair) => {
+                          const ind = props.values.cc_pair_ids.indexOf(
+                            ccPair.cc_pair_id
+                          );
+                          const isSelected = ind !== -1;
+                          return (
+                            <div
+                              key={`${ccPair.connector.id}-${ccPair.credential.id}`}
+                              className={
+                                `
+                              px-3 
+                              py-1
+                              rounded-lg 
+                              border
+                              border-border 
+                              w-fit 
+                              flex 
+                              cursor-pointer ` +
+                                (isSelected
+                                  ? " bg-background-200"
+                                  : " hover:bg-accent-background-hovered")
+                              }
+                              onClick={() => {
+                                if (isSelected) {
+                                  arrayHelpers.remove(ind);
+                                } else {
+                                  arrayHelpers.push(ccPair.cc_pair_id);
+                                }
+                              }}
+                            >
+                              <div className="my-auto">
+                                <ConnectorTitle
+                                  connector={ccPair.connector}
+                                  ccPairId={ccPair.cc_pair_id}
+                                  ccPairName={ccPair.name}
+                                  isLink={false}
+                                  showMetadata={false}
+                                />
+                              </div>
+                            </div>
+                          );
+                        })}
+                      </div>
+                    )}
                  />
-                )}
-              </div>
+                </div>
+              )}

-              <div className="flex mt-6 pt-4 border-t border-neutral-200">
+              <div className="flex mt-6">
                <Button
                  type="submit"
                  variant="submit"
                  disabled={props.isSubmitting}
-                  className="w-56 mx-auto py-1.5 h-auto text-sm"
+                  className="w-64 mx-auto"
                >
-                  {isUpdate ? "Update Document Set" : "Create Document Set"}
+                  {isUpdate ? "Update!" : "Create!"}
                </Button>
              </div>
            </Form>
--- a/web/src/app/admin/embeddings/RerankingFormPage.tsx
+++ b/web/src/app/admin/embeddings/RerankingFormPage.tsx
@@ -15,7 +15,6 @@ import {
 } from "./interfaces";
 import { FiExternalLink } from "react-icons/fi";
 import {
-  AmazonIcon,
  CohereIcon,
  LiteLLMIcon,
  MixedBreadIcon,
@@ -243,11 +242,6 @@ const RerankingDetailsForm = forwardRef<
                            card.rerank_provider_type == RerankerProvider.COHERE
                          ) {
                            setIsApiKeyModalOpen(true);
-                          } else if (
-                            card.rerank_provider_type ==
-                            RerankerProvider.BEDROCK
-                          ) {
-                            setIsApiKeyModalOpen(true);
                          } else if (
                            card.rerank_provider_type ==
                            RerankerProvider.LITELLM
@@ -284,9 +278,6 @@ const RerankingDetailsForm = forwardRef<
                            ) : card.rerank_provider_type ===
                              RerankerProvider.COHERE ? (
                              <CohereIcon size={24} className="mr-2" />
-                            ) : card.rerank_provider_type ===
-                              RerankerProvider.BEDROCK ? (
-                              <AmazonIcon size={24} className="mr-2" />
                            ) : (
                              <MixedBreadIcon size={24} className="mr-2" />
                            )}
@@ -446,10 +437,7 @@ const RerankingDetailsForm = forwardRef<
                        placeholder={
                          values.rerank_api_key
                            ? "*".repeat(values.rerank_api_key.length)
-                            : values.rerank_provider_type ===
-                                RerankerProvider.BEDROCK
-                              ? "aws_ACCESSKEY_SECRETKEY_REGION"
-                              : "Enter your API key"
+                            : undefined
                        }
                        onChange={(e: React.ChangeEvent<HTMLInputElement>) => {
                          const value = e.target.value;
@@ -460,12 +448,7 @@ const RerankingDetailsForm = forwardRef<
                          setFieldValue("api_key", value);
                        }}
                        type="password"
-                        label={
-                          values.rerank_provider_type ===
-                          RerankerProvider.BEDROCK
-                            ? "AWS Credentials in format: aws_ACCESSKEY_SECRETKEY_REGION"
-                            : "Cohere API Key"
-                        }
+                        label="Cohere API Key"
                        name="rerank_api_key"
                      />
                      <div className="flex w-full justify-end mt-4">
--- a/web/src/app/admin/embeddings/interfaces.ts
+++ b/web/src/app/admin/embeddings/interfaces.ts
@@ -18,7 +18,6 @@ export interface RerankingDetails {
 export enum RerankerProvider {
  COHERE = "cohere",
  LITELLM = "litellm",
-  BEDROCK = "bedrock",
 }

 export enum EmbeddingPrecision {
@@ -101,15 +100,6 @@ export const rerankingModels: RerankingModel[] = [
    description: "Powerful multilingual reranking model.",
    link: "https://docs.cohere.com/v2/reference/rerank",
  },
-  {
-    cloud: true,
-    rerank_provider_type: RerankerProvider.BEDROCK,
-    modelName: "cohere.rerank-v3-5:0",
-    displayName: "Cohere Rerank 3.5",
-    description:
-      "Powerful multilingual reranking model invoked through AWS Bedrock.",
-    link: "https://aws.amazon.com/blogs/machine-learning/cohere-rerank-3-5-is-now-available-in-amazon-bedrock-through-rerank-api",
-  },
 ];

 export const getCurrentModelCopy = (
--- a/web/src/app/admin/indexing/status/CCPairIndexingStatusTable.tsx
+++ b/web/src/app/admin/indexing/status/CCPairIndexingStatusTable.tsx
@@ -26,8 +26,6 @@ import {
  FiUnlock,
  FiRefreshCw,
  FiPauseCircle,
-  FiFilter,
-  FiX,
 } from "react-icons/fi";
 import {
  Tooltip,
@@ -43,7 +41,6 @@ import Cookies from "js-cookie";
 import { TOGGLED_CONNECTORS_COOKIE_NAME } from "@/lib/constants";
 import { usePaidEnterpriseFeaturesEnabled } from "@/components/settings/usePaidEnterpriseFeaturesEnabled";
 import { ConnectorCredentialPairStatus } from "../../connector/[ccPairId]/types";
-import { FilterComponent, FilterOptions } from "./FilterComponent";

 function SummaryRow({
  source,
@@ -288,26 +285,7 @@ export function CCPairIndexingStatusTable({
    return savedState ? JSON.parse(savedState) : {};
  });

-  const [filterOptions, setFilterOptions] = useState<FilterOptions>({
-    accessType: null,
-    docsCountFilter: {
-      operator: null,
-      value: null,
-    },
-    lastStatus: null,
-  });
-
-  // Reference to the FilterComponent for resetting its state
-  const filterComponentRef = useRef<{
-    resetFilters: () => void;
-  } | null>(null);
-
-  const {
-    groupedStatuses,
-    sortedSources,
-    groupSummaries,
-    filteredGroupedStatuses,
-  } = useMemo(() => {
+  const { groupedStatuses, sortedSources, groupSummaries } = useMemo(() => {
    const grouped: Record<ValidSources, ConnectorIndexingStatus<any, any>[]> =
      {} as Record<ValidSources, ConnectorIndexingStatus<any, any>[]>;

@@ -359,139 +337,12 @@ export function CCPairIndexingStatusTable({
      };
    });

-    // Apply filters to create filtered grouped statuses
-    const filteredGrouped: Record<
-      ValidSources,
-      ConnectorIndexingStatus<any, any>[]
-    > = {} as Record<ValidSources, ConnectorIndexingStatus<any, any>[]>;
-
-    sorted.forEach((source) => {
-      const statuses = grouped[source];
-
-      // Apply filters
-      const filteredStatuses = statuses.filter((status) => {
-        // Filter by access type
-        if (filterOptions.accessType && filterOptions.accessType.length > 0) {
-          if (!filterOptions.accessType.includes(status.access_type)) {
-            return false;
-          }
-        }
-
-        // Filter by last status
-        if (filterOptions.lastStatus && filterOptions.lastStatus.length > 0) {
-          if (
-            !filterOptions.lastStatus.includes(
-              status.last_finished_status as any
-            )
-          ) {
-            return false;
-          }
-        }
-
-        // Filter by docs count
-        if (filterOptions.docsCountFilter.operator) {
-          const { operator, value } = filterOptions.docsCountFilter;
-
-          // If only operator is selected (no value), show all
-          if (value === null) {
-            return true;
-          }
-
-          if (operator === ">" && !(status.docs_indexed > value)) {
-            return false;
-          } else if (operator === "<" && !(status.docs_indexed < value)) {
-            return false;
-          } else if (operator === "=" && status.docs_indexed !== value) {
-            return false;
-          }
-        }
-
-        return true;
-      });
-
-      if (filteredStatuses.length > 0) {
-        filteredGrouped[source] = filteredStatuses;
-      }
-    });
-
    return {
      groupedStatuses: grouped,
      sortedSources: sorted,
      groupSummaries: summaries,
-      filteredGroupedStatuses: filteredGrouped,
    };
-  }, [ccPairsIndexingStatuses, editableCcPairsIndexingStatuses, filterOptions]);
-
-  // Determine which sources to display based on filters and search
-  const displaySources = useMemo(() => {
-    const hasActiveFilters =
-      (filterOptions.accessType && filterOptions.accessType.length > 0) ||
-      (filterOptions.lastStatus && filterOptions.lastStatus.length > 0) ||
-      filterOptions.docsCountFilter.operator !== null;
-
-    if (hasActiveFilters) {
-      return Object.keys(filteredGroupedStatuses) as ValidSources[];
-    }
-
-    return sortedSources;
-  }, [sortedSources, filteredGroupedStatuses, filterOptions]);
-
-  const handleFilterChange = (newFilters: FilterOptions) => {
-    setFilterOptions(newFilters);
-
-    // Auto-expand sources when filters are applied
-    if (
-      (newFilters.accessType && newFilters.accessType.length > 0) ||
-      (newFilters.lastStatus && newFilters.lastStatus.length > 0) ||
-      newFilters.docsCountFilter.operator !== null
-    ) {
-      // We need to wait for the filteredGroupedStatuses to be updated
-      // before we can expand the sources
-      setTimeout(() => {
-        const sourcesToExpand = Object.keys(
-          filteredGroupedStatuses
-        ) as ValidSources[];
-        const newConnectorsToggled = { ...connectorsToggled };
-
-        sourcesToExpand.forEach((source) => {
-          newConnectorsToggled[source] = true;
-        });
-
-        setConnectorsToggled(newConnectorsToggled);
-        Cookies.set(
-          TOGGLED_CONNECTORS_COOKIE_NAME,
-          JSON.stringify(newConnectorsToggled)
-        );
-      }, 0);
-    }
-  };
-
-  const clearAllFilters = () => {
-    const emptyFilters: FilterOptions = {
-      accessType: null,
-      docsCountFilter: {
-        operator: null,
-        value: null,
-      },
-      lastStatus: null,
-    };
-
-    setFilterOptions(emptyFilters);
-
-    // Reset the FilterComponent's internal state
-    if (filterComponentRef.current) {
-      filterComponentRef.current.resetFilters();
-    }
-  };
-
-  // Check if filters are active
-  const hasActiveFilters = useMemo(() => {
-    return (
-      (filterOptions.accessType && filterOptions.accessType.length > 0) ||
-      (filterOptions.lastStatus && filterOptions.lastStatus.length > 0) ||
-      filterOptions.docsCountFilter.operator !== null
-    );
-  }, [filterOptions]);
+  }, [ccPairsIndexingStatuses, editableCcPairsIndexingStatuses]);

  const toggleSource = (
    source: ValidSources,
@@ -525,194 +376,127 @@ export function CCPairIndexingStatusTable({
    sortedSources.length;

  return (
-    <>
-      <Table>
-        <TableHeader>
-          <ConnectorRow
-            invisible
-            ccPairsIndexingStatus={{
-              cc_pair_id: 1,
+    <Table>
+      <TableHeader>
+        <ConnectorRow
+          invisible
+          ccPairsIndexingStatus={{
+            cc_pair_id: 1,
+            name: "Sample File Connector",
+            cc_pair_status: ConnectorCredentialPairStatus.ACTIVE,
+            last_status: "success",
+            connector: {
              name: "Sample File Connector",
-              cc_pair_status: ConnectorCredentialPairStatus.ACTIVE,
-              last_status: "success",
-              connector: {
-                name: "Sample File Connector",
-                source: ValidSources.File,
-                input_type: "poll",
-                connector_specific_config: {
-                  file_locations: ["/path/to/sample/file.txt"],
-                },
-                refresh_freq: 86400,
-                prune_freq: null,
-                indexing_start: new Date("2023-07-01T12:00:00Z"),
-                id: 1,
-                credential_ids: [],
-                access_type: "public",
-                time_created: "2023-07-01T12:00:00Z",
-                time_updated: "2023-07-01T12:00:00Z",
-              },
-              credential: {
-                id: 1,
-                name: "Sample Credential",
-                source: ValidSources.File,
-                user_id: "1",
-                time_created: "2023-07-01T12:00:00Z",
-                time_updated: "2023-07-01T12:00:00Z",
-                credential_json: {},
-                admin_public: false,
+              source: ValidSources.File,
+              input_type: "poll",
+              connector_specific_config: {
+                file_locations: ["/path/to/sample/file.txt"],
              },
+              refresh_freq: 86400,
+              prune_freq: null,
+              indexing_start: new Date("2023-07-01T12:00:00Z"),
+              id: 1,
+              credential_ids: [],
              access_type: "public",
-              docs_indexed: 1000,
-              last_success: "2023-07-01T12:00:00Z",
-              last_finished_status: "success",
-              latest_index_attempt: null,
-              groups: [], // Add this line
-            }}
-            isEditable={false}
-          />
-        </TableHeader>
-        <div className="flex -mt-12 items-center w-0 m4 gap-x-2">
-          <input
-            type="text"
-            ref={searchInputRef}
-            placeholder="Search connectors..."
-            value={searchTerm}
-            onChange={(e) => setSearchTerm(e.target.value)}
-            className="ml-1 w-96 h-9 border border-border flex-none rounded-md bg-background-50 px-3 py-1 text-sm shadow-sm transition-colors placeholder:text-muted-foreground focus-visible:outline-none focus-visible:ring-1 focus-visible:ring-ring"
-          />
+              time_created: "2023-07-01T12:00:00Z",
+              time_updated: "2023-07-01T12:00:00Z",
+            },
+            credential: {
+              id: 1,
+              name: "Sample Credential",
+              source: ValidSources.File,
+              user_id: "1",
+              time_created: "2023-07-01T12:00:00Z",
+              time_updated: "2023-07-01T12:00:00Z",
+              credential_json: {},
+              admin_public: false,
+            },
+            access_type: "public",
+            docs_indexed: 1000,
+            last_success: "2023-07-01T12:00:00Z",
+            last_finished_status: "success",
+            latest_index_attempt: null,
+            groups: [], // Add this line
+          }}
+          isEditable={false}
+        />
+      </TableHeader>
+      <div className="flex -mt-12 items-center w-0 m4 gap-x-2">
+        <input
+          type="text"
+          ref={searchInputRef}
+          placeholder="Search connectors..."
+          value={searchTerm}
+          onChange={(e) => setSearchTerm(e.target.value)}
+          className="ml-1 w-96 h-9  border border-border flex-none rounded-md bg-background-50 px-3 py-1 text-sm shadow-sm transition-colors placeholder:text-muted-foreground focus-visible:outline-none focus-visible:ring-1 focus-visible:ring-ring"
+        />

-          <Button className="h-9" onClick={() => toggleSources()}>
-            {!shouldExpand ? "Collapse All" : "Expand All"}
-          </Button>
-
-          <div className="flex items-center gap-2">
-            <FilterComponent
-              onFilterChange={handleFilterChange}
-              ref={filterComponentRef}
-            />
-
-            {hasActiveFilters && (
-              <div className="flex flex-none items-center gap-1 ml-2 max-w-[500px]">
-                {filterOptions.accessType &&
-                  filterOptions.accessType.length > 0 && (
-                    <Badge variant="secondary" className="px-2 py-0.5 text-xs">
-                      Access: {filterOptions.accessType.join(", ")}
-                    </Badge>
-                  )}
-
-                {filterOptions.lastStatus &&
-                  filterOptions.lastStatus.length > 0 && (
-                    <Badge variant="secondary" className="px-2 py-0.5 text-xs">
-                      Status:{" "}
-                      {filterOptions.lastStatus
-                        .map((s) => s.replace(/_/g, " "))
-                        .join(", ")}
-                    </Badge>
-                  )}
-
-                {filterOptions.docsCountFilter.operator &&
-                  filterOptions.docsCountFilter.value !== null && (
-                    <Badge variant="secondary" className="px-2 py-0.5 text-xs">
-                      Docs {filterOptions.docsCountFilter.operator}{" "}
-                      {filterOptions.docsCountFilter.value}
-                    </Badge>
-                  )}
-
-                {filterOptions.docsCountFilter.operator &&
-                  filterOptions.docsCountFilter.value === null && (
-                    <Badge variant="secondary" className="px-2 py-0.5 text-xs">
-                      Docs {filterOptions.docsCountFilter.operator} any
-                    </Badge>
-                  )}
-
-                <Badge
-                  variant="outline"
-                  className="px-2 py-0.5 text-xs border-red-400  bg-red-100 hover:border-red-600 cursor-pointer hover:bg-red-100 dark:hover:bg-red-900"
-                  onClick={() => {
-                    if (filterComponentRef.current) {
-                      filterComponentRef.current.resetFilters();
-                      setFilterOptions({
-                        accessType: null,
-                        docsCountFilter: {
-                          operator: null,
-                          value: null,
-                        },
-                        lastStatus: null,
-                      });
-                    }
-                  }}
-                >
-                  <span className="text-red-500 dark:text-red-400">Clear</span>
-                </Badge>
-              </div>
-            )}
-          </div>
-        </div>
-        <TableBody>
-          {displaySources
-            .filter(
-              (source) =>
-                source != "not_applicable" && source != "ingestion_api"
-            )
-            .map((source, ind) => {
-              const sourceMatches = source
-                .toLowerCase()
-                .includes(searchTerm.toLowerCase());
-
-              const statuses =
-                filteredGroupedStatuses[source] || groupedStatuses[source];
-
-              const matchingConnectors = statuses.filter((status) =>
+        <Button className="h-9" onClick={() => toggleSources()}>
+          {!shouldExpand ? "Collapse All" : "Expand All"}
+        </Button>
+      </div>
+      <TableBody>
+        {sortedSources
+          .filter(
+            (source) => source != "not_applicable" && source != "ingestion_api"
+          )
+          .map((source, ind) => {
+            const sourceMatches = source
+              .toLowerCase()
+              .includes(searchTerm.toLowerCase());
+            const matchingConnectors = groupedStatuses[source].filter(
+              (status) =>
                (status.name || "")
                  .toLowerCase()
                  .includes(searchTerm.toLowerCase())
-              );
-
-              if (sourceMatches || matchingConnectors.length > 0) {
-                return (
-                  <React.Fragment key={ind}>
-                    <br className="mt-4" />
-                    <SummaryRow
-                      source={source}
-                      summary={groupSummaries[source]}
-                      isOpen={connectorsToggled[source] || false}
-                      onToggle={() => toggleSource(source)}
-                    />
-                    {connectorsToggled[source] && (
-                      <>
-                        <TableRow className="border border-border dark:border-neutral-700">
-                          <TableHead>Name</TableHead>
-                          <TableHead>Last Indexed</TableHead>
-                          <TableHead>Activity</TableHead>
-                          {isPaidEnterpriseFeaturesEnabled && (
-                            <TableHead>Permissions</TableHead>
-                          )}
-                          <TableHead>Total Docs</TableHead>
-                          <TableHead>Last Status</TableHead>
-                          <TableHead></TableHead>
-                        </TableRow>
-                        {(sourceMatches ? statuses : matchingConnectors).map(
-                          (ccPairsIndexingStatus) => (
-                            <ConnectorRow
-                              key={ccPairsIndexingStatus.cc_pair_id}
-                              ccPairsIndexingStatus={ccPairsIndexingStatus}
-                              isEditable={editableCcPairsIndexingStatuses.some(
-                                (e) =>
-                                  e.cc_pair_id ===
-                                  ccPairsIndexingStatus.cc_pair_id
-                              )}
-                            />
-                          )
+            );
+            if (sourceMatches || matchingConnectors.length > 0) {
+              return (
+                <React.Fragment key={ind}>
+                  <br className="mt-4" />
+                  <SummaryRow
+                    source={source}
+                    summary={groupSummaries[source]}
+                    isOpen={connectorsToggled[source] || false}
+                    onToggle={() => toggleSource(source)}
+                  />
+                  {connectorsToggled[source] && (
+                    <>
+                      <TableRow
+                        noHover
+                        className="border  !  border-border dark:border-neutral-700"
+                      >
+                        <TableHead>Name</TableHead>
+                        <TableHead>Last Indexed</TableHead>
+                        <TableHead>Activity</TableHead>
+                        {isPaidEnterpriseFeaturesEnabled && (
+                          <TableHead>Permissions</TableHead>
                        )}
-                      </>
-                    )}
-                  </React.Fragment>
-                );
-              }
-              return null;
-            })}
-        </TableBody>
-      </Table>
-    </>
+                        <TableHead>Total Docs</TableHead>
+                        <TableHead>Last Status</TableHead>
+                        <TableHead></TableHead>
+                      </TableRow>
+                      {(sourceMatches
+                        ? groupedStatuses[source]
+                        : matchingConnectors
+                      ).map((ccPairsIndexingStatus) => (
+                        <ConnectorRow
+                          key={ccPairsIndexingStatus.cc_pair_id}
+                          ccPairsIndexingStatus={ccPairsIndexingStatus}
+                          isEditable={editableCcPairsIndexingStatuses.some(
+                            (e) =>
+                              e.cc_pair_id === ccPairsIndexingStatus.cc_pair_id
+                          )}
+                        />
+                      ))}
+                    </>
+                  )}
+                </React.Fragment>
+              );
+            }
+            return null;
+          })}
+      </TableBody>
+    </Table>
  );
 }
--- a/web/src/app/admin/indexing/status/FilterComponent.tsx
+++ b/web/src/app/admin/indexing/status/FilterComponent.tsx
@@ -1,375 +0,0 @@
-"use client";
-
-import React, { useState, useImperativeHandle, forwardRef } from "react";
-import {
-  DropdownMenu,
-  DropdownMenuContent,
-  DropdownMenuGroup,
-  DropdownMenuItem,
-  DropdownMenuLabel,
-  DropdownMenuSeparator,
-  DropdownMenuTrigger,
-  DropdownMenuCheckboxItem,
-  DropdownMenuRadioGroup,
-  DropdownMenuRadioItem,
-} from "@/components/ui/dropdown-menu";
-import { SortIcon } from "@/components/icons/icons";
-import { Button } from "@/components/ui/button";
-import { Input } from "@/components/ui/input";
-import { Badge } from "@/components/ui/badge";
-import { AccessType, ValidStatuses } from "@/lib/types";
-import { FiFilter, FiX, FiCheck } from "react-icons/fi";
-
-export interface FilterOptions {
-  accessType: AccessType[] | null;
-  docsCountFilter: {
-    operator: ">" | "<" | "=" | null;
-    value: number | null;
-  };
-  lastStatus: ValidStatuses[] | null;
-}
-
-interface FilterComponentProps {
-  onFilterChange: (filters: FilterOptions) => void;
-}
-
-export const FilterComponent = forwardRef<
-  { resetFilters: () => void },
-  FilterComponentProps
->(({ onFilterChange }, ref) => {
-  const [isOpen, setIsOpen] = useState(false);
-  const [filters, setFilters] = useState<FilterOptions>({
-    accessType: null,
-    docsCountFilter: {
-      operator: null,
-      value: null,
-    },
-    lastStatus: null,
-  });
-
-  // Local state for tracking selected filters before applying
-  const [docsOperator, setDocsOperator] = useState<">" | "<" | "=" | null>(
-    null
-  );
-  const [docsValue, setDocsValue] = useState<string>("");
-  const [selectedAccessTypes, setSelectedAccessTypes] = useState<AccessType[]>(
-    []
-  );
-  const [selectedStatuses, setSelectedStatuses] = useState<ValidStatuses[]>([]);
-
-  // Expose resetFilters method via ref
-  useImperativeHandle(ref, () => ({
-    resetFilters: () => {
-      setDocsOperator(null);
-      setDocsValue("");
-      setSelectedAccessTypes([]);
-      setSelectedStatuses([]);
-      setFilters({
-        accessType: null,
-        docsCountFilter: {
-          operator: null,
-          value: null,
-        },
-        lastStatus: null,
-      });
-    },
-  }));
-
-  const handleAccessTypeChange = (accessType: AccessType) => {
-    const newAccessTypes = selectedAccessTypes.includes(accessType)
-      ? selectedAccessTypes.filter((type) => type !== accessType)
-      : [...selectedAccessTypes, accessType];
-
-    setSelectedAccessTypes(newAccessTypes);
-  };
-
-  const handleStatusChange = (status: ValidStatuses) => {
-    const newStatuses = selectedStatuses.includes(status)
-      ? selectedStatuses.filter((s) => s !== status)
-      : [...selectedStatuses, status];
-
-    setSelectedStatuses(newStatuses);
-  };
-
-  const handleDocsFilterChange = () => {
-    if (docsOperator && docsValue) {
-      const newFilters = {
-        ...filters,
-        accessType: selectedAccessTypes.length > 0 ? selectedAccessTypes : null,
-        lastStatus: selectedStatuses.length > 0 ? selectedStatuses : null,
-        docsCountFilter: {
-          operator: docsOperator,
-          value: parseInt(docsValue),
-        },
-      };
-
-      setFilters(newFilters);
-      onFilterChange(newFilters);
-      setIsOpen(false);
-    }
-  };
-
-  const applyFilters = () => {
-    const newFilters = {
-      ...filters,
-      accessType: selectedAccessTypes.length > 0 ? selectedAccessTypes : null,
-      lastStatus: selectedStatuses.length > 0 ? selectedStatuses : null,
-      docsCountFilter: {
-        operator: docsOperator,
-        value: docsValue ? parseInt(docsValue) : null,
-      },
-    };
-
-    setFilters(newFilters);
-    onFilterChange(newFilters);
-    setIsOpen(false);
-  };
-
-  const clearFilters = () => {
-    setSelectedAccessTypes([]);
-    setSelectedStatuses([]);
-    setDocsOperator(null);
-    setDocsValue("");
-
-    const newFilters = {
-      accessType: null,
-      docsCountFilter: {
-        operator: null,
-        value: null,
-      },
-      lastStatus: null,
-    };
-
-    setFilters(newFilters);
-    onFilterChange(newFilters);
-  };
-
-  // Sync local state with filters when dropdown opens
-  const handleOpenChange = (open: boolean) => {
-    if (open) {
-      // When opening, initialize local state from current filters
-      setSelectedAccessTypes(filters.accessType || []);
-      setSelectedStatuses(filters.lastStatus || []);
-      setDocsOperator(filters.docsCountFilter.operator);
-      setDocsValue(
-        filters.docsCountFilter.value !== null
-          ? filters.docsCountFilter.value.toString()
-          : ""
-      );
-    }
-    setIsOpen(open);
-  };
-
-  const hasActiveFilters =
-    (filters.accessType && filters.accessType.length > 0) ||
-    (filters.lastStatus && filters.lastStatus.length > 0) ||
-    filters.docsCountFilter.operator !== null;
-
-  // Get active filter count for badge
-  const getActiveFilterCount = () => {
-    let count = 0;
-    if (filters.accessType && filters.accessType.length > 0) count++;
-    if (filters.lastStatus && filters.lastStatus.length > 0) count++;
-    if (filters.docsCountFilter.operator !== null) count++;
-    return count;
-  };
-
-  return (
-    <div className="relative">
-      <DropdownMenu open={isOpen} onOpenChange={handleOpenChange}>
-        <DropdownMenuTrigger asChild>
-          <Button
-            variant="outline"
-            size="sm"
-            className={`p-2 h-9 ${
-              hasActiveFilters ? "border-primary bg-primary/5" : ""
-            }`}
-          >
-            <SortIcon size={20} className="text-neutral-800" />
-          </Button>
-        </DropdownMenuTrigger>
-        <DropdownMenuContent
-          align="end"
-          className="w-72"
-          onCloseAutoFocus={(e) => e.preventDefault()}
-        >
-          <div className="flex items-center justify-between px-2 py-1.5">
-            <DropdownMenuLabel className="text-base font-medium">
-              Filter Connectors
-            </DropdownMenuLabel>
-          </div>
-          <DropdownMenuSeparator />
-
-          <DropdownMenuGroup>
-            <DropdownMenuLabel className="px-2 py-1.5 text-xs text-muted-foreground">
-              Access Type
-            </DropdownMenuLabel>
-            <div onClick={(e) => e.stopPropagation()}>
-              <DropdownMenuCheckboxItem
-                checked={selectedAccessTypes.includes("public")}
-                onCheckedChange={() => handleAccessTypeChange("public")}
-                className="flex items-center justify-between"
-                onSelect={(e) => e.preventDefault()}
-              >
-                Public
-              </DropdownMenuCheckboxItem>
-              <DropdownMenuCheckboxItem
-                checked={selectedAccessTypes.includes("private")}
-                onCheckedChange={() => handleAccessTypeChange("private")}
-                className="flex items-center justify-between"
-                onSelect={(e) => e.preventDefault()}
-              >
-                Private
-              </DropdownMenuCheckboxItem>
-              <DropdownMenuCheckboxItem
-                checked={selectedAccessTypes.includes("sync")}
-                onCheckedChange={() => handleAccessTypeChange("sync")}
-                className="flex items-center justify-between"
-                onSelect={(e) => e.preventDefault()}
-              >
-                Auto-Sync
-              </DropdownMenuCheckboxItem>
-            </div>
-          </DropdownMenuGroup>
-
-          <DropdownMenuSeparator />
-
-          <DropdownMenuGroup>
-            <DropdownMenuLabel className="px-2 py-1.5 text-xs text-muted-foreground">
-              Last Status
-            </DropdownMenuLabel>
-            <div onClick={(e) => e.stopPropagation()}>
-              <DropdownMenuCheckboxItem
-                checked={selectedStatuses.includes("success")}
-                onCheckedChange={() => handleStatusChange("success")}
-                className="flex items-center justify-between"
-                onSelect={(e) => e.preventDefault()}
-              >
-                Success
-              </DropdownMenuCheckboxItem>
-              <DropdownMenuCheckboxItem
-                checked={selectedStatuses.includes("failed")}
-                onCheckedChange={() => handleStatusChange("failed")}
-                className="flex items-center justify-between"
-                onSelect={(e) => e.preventDefault()}
-              >
-                Failed
-              </DropdownMenuCheckboxItem>
-              <DropdownMenuCheckboxItem
-                checked={selectedStatuses.includes("in_progress")}
-                onCheckedChange={() => handleStatusChange("in_progress")}
-                className="flex items-center justify-between"
-                onSelect={(e) => e.preventDefault()}
-              >
-                In Progress
-              </DropdownMenuCheckboxItem>
-              <DropdownMenuCheckboxItem
-                checked={selectedStatuses.includes("not_started")}
-                onCheckedChange={() => handleStatusChange("not_started")}
-                className="flex items-center justify-between"
-                onSelect={(e) => e.preventDefault()}
-              >
-                Not Started
-              </DropdownMenuCheckboxItem>
-              <DropdownMenuCheckboxItem
-                checked={selectedStatuses.includes("completed_with_errors")}
-                onCheckedChange={() =>
-                  handleStatusChange("completed_with_errors")
-                }
-                className="flex items-center justify-between"
-                onSelect={(e) => e.preventDefault()}
-              >
-                Completed with Errors
-              </DropdownMenuCheckboxItem>
-            </div>
-          </DropdownMenuGroup>
-
-          <DropdownMenuSeparator />
-
-          <DropdownMenuGroup>
-            <DropdownMenuLabel className="px-2 py-1.5 text-xs text-muted-foreground">
-              Document Count
-            </DropdownMenuLabel>
-            <div
-              className="flex items-center px-2 py-2 gap-2"
-              onClick={(e) => e.stopPropagation()}
-            >
-              <div className="flex gap-2">
-                <Button
-                  variant={docsOperator === ">" ? "default" : "outline"}
-                  size="sm"
-                  className="h-8 px-2"
-                  onClick={(e) => {
-                    e.preventDefault();
-                    e.stopPropagation();
-                    setDocsOperator(docsOperator === ">" ? null : ">");
-                  }}
-                  type="button"
-                >
-                  &gt;
-                </Button>
-                <Button
-                  variant={docsOperator === "<" ? "default" : "outline"}
-                  size="sm"
-                  className="h-8 px-2"
-                  onClick={(e) => {
-                    e.preventDefault();
-                    e.stopPropagation();
-                    setDocsOperator(docsOperator === "<" ? null : "<");
-                  }}
-                  type="button"
-                >
-                  &lt;
-                </Button>
-                <Button
-                  variant={docsOperator === "=" ? "default" : "outline"}
-                  size="sm"
-                  className="h-8 px-2"
-                  onClick={(e) => {
-                    e.preventDefault();
-                    e.stopPropagation();
-                    setDocsOperator(docsOperator === "=" ? null : "=");
-                  }}
-                  type="button"
-                >
-                  =
-                </Button>
-              </div>
-              <Input
-                type="number"
-                placeholder="Count"
-                value={docsValue}
-                onChange={(e) => setDocsValue(e.target.value)}
-                className="h-8 w-full"
-                onClick={(e) => e.stopPropagation()}
-              />
-            </div>
-            <div className="px-2 py-1.5">
-              <Button
-                size="sm"
-                className="w-full h-8"
-                disabled={false}
-                onClick={(e) => {
-                  e.preventDefault();
-                  e.stopPropagation();
-                  applyFilters();
-                }}
-                type="button"
-              >
-                Apply
-              </Button>
-            </div>
-          </DropdownMenuGroup>
-        </DropdownMenuContent>
-      </DropdownMenu>
-
-      {hasActiveFilters && (
-        <div className="absolute -top-1 -right-1">
-          <Badge className="h-2 bg-red-400 border-red-400 w-2 p-0 border-2 flex items-center justify-center" />
-        </div>
-      )}
-    </div>
-  );
-});
-
-FilterComponent.displayName = "FilterComponent";
--- a/web/src/app/admin/settings/SettingsForm.tsx
+++ b/web/src/app/admin/settings/SettingsForm.tsx
@@ -26,7 +26,7 @@ export function Checkbox({
  onChange: (e: React.ChangeEvent<HTMLInputElement>) => void;
 }) {
  return (
-    <label className="flex text-xs cursor-pointer">
+    <label className="flex text-sm cursor-pointer">
      <input
        checked={checked}
        onChange={onChange}
@@ -34,7 +34,7 @@ export function Checkbox({
        className="mr-2 w-3.5 h-3.5 my-auto"
      />
      <div>
-        <Label small>{label}</Label>
+        <Label>{label}</Label>
        {sublabel && <SubLabel>{sublabel}</SubLabel>}
      </div>
    </label>
@@ -208,7 +208,7 @@ export function SettingsForm() {
  }

  return (
-    <div className="flex flex-col pb-8">
+    <div>
      {popup}
      <Title className="mb-4">Workspace Settings</Title>
      <Checkbox
@@ -307,51 +307,6 @@ export function SettingsForm() {
          </Button>
        </>
      )}
-
-      {/* Image Processing Settings */}
-      <Title className="mt-8 mb-4">Image Processing</Title>
-
-      <div className="flex flex-col gap-2">
-        <Checkbox
-          label="Enable Image Extraction and Analysis"
-          sublabel="Extract and analyze images from documents during indexing. This allows the system to process images and create searchable descriptions of them."
-          checked={settings.image_extraction_and_analysis_enabled ?? false}
-          onChange={(e) =>
-            handleToggleSettingsField(
-              "image_extraction_and_analysis_enabled",
-              e.target.checked
-            )
-          }
-        />
-
-        <Checkbox
-          label="Enable Search-time Image Analysis"
-          sublabel="Analyze images at search time when a user asks about images. This provides more detailed and query-specific image analysis but may increase search-time latency."
-          checked={settings.search_time_image_analysis_enabled ?? false}
-          onChange={(e) =>
-            handleToggleSettingsField(
-              "search_time_image_analysis_enabled",
-              e.target.checked
-            )
-          }
-        />
-
-        <IntegerInput
-          label="Maximum Image Size for Analysis (MB)"
-          sublabel="Images larger than this size will not be analyzed to prevent excessive resource usage."
-          value={settings.image_analysis_max_size_mb ?? null}
-          onChange={(e) => {
-            const value = e.target.value ? parseInt(e.target.value) : null;
-            if (value !== null && !isNaN(value) && value > 0) {
-              updateSettingField([
-                { fieldName: "image_analysis_max_size_mb", newValue: value },
-              ]);
-            }
-          }}
-          id="image-analysis-max-size"
-          placeholder="Enter maximum size in MB"
-        />
-      </div>
    </div>
  );
 }
--- a/web/src/app/admin/settings/interfaces.ts
+++ b/web/src/app/admin/settings/interfaces.ts
@@ -21,11 +21,6 @@ export interface Settings {
  auto_scroll: boolean;
  temperature_override_enabled: boolean;
  query_history_type: QueryHistoryType;
-
-  // Image processing settings
-  image_extraction_and_analysis_enabled?: boolean;
-  search_time_image_analysis_enabled?: boolean;
-  image_analysis_max_size_mb?: number;
 }

 export enum NotificationType {
--- a/web/src/app/chat/ChatPage.tsx
+++ b/web/src/app/chat/ChatPage.tsx
@@ -204,6 +204,7 @@ export function ChatPage({

  const [documentSidebarVisible, setDocumentSidebarVisible] = useState(false);
  const [proSearchEnabled, setProSearchEnabled] = useState(proSearchToggled);
+  const [streamingAllowed, setStreamingAllowed] = useState(false);
  const toggleProSearch = () => {
    Cookies.set(
      PRO_SEARCH_TOGGLED_COOKIE_NAME,
@@ -1978,6 +1979,8 @@ export function ChatPage({

  const innerSidebarElementRef = useRef<HTMLDivElement>(null);
  const [settingsToggled, setSettingsToggled] = useState(false);
+  const [showDeleteAllModal, setShowDeleteAllModal] = useState(false);
+
  const currentPersona = alternativeAssistant || liveAssistant;

  const HORIZON_DISTANCE = 800;
@@ -2139,6 +2142,32 @@ export function ChatPage({

      <ChatPopup />

+      {showDeleteAllModal && (
+        <ConfirmEntityModal
+          entityType="All Chats"
+          entityName="all your chat sessions"
+          onClose={() => setShowDeleteAllModal(false)}
+          additionalDetails="This action cannot be undone. All your chat sessions will be deleted."
+          onSubmit={async () => {
+            const response = await deleteAllChatSessions("Chat");
+            if (response.ok) {
+              setShowDeleteAllModal(false);
+              setPopup({
+                message: "All your chat sessions have been deleted.",
+                type: "success",
+              });
+              refreshChatSessions();
+              router.push("/chat");
+            } else {
+              setPopup({
+                message: "Failed to delete all chat sessions.",
+                type: "error",
+              });
+            }
+          }}
+        />
+      )}
+
      {currentFeedback && (
        <FeedbackModal
          feedbackType={currentFeedback[0]}
@@ -2296,6 +2325,7 @@ export function ChatPage({
                  folders={folders}
                  removeToggle={removeToggle}
                  showShareModal={showShareModal}
+                  showDeleteAllModal={() => setShowDeleteAllModal(true)}
                />
              </div>

--- a/web/src/app/chat/input/ChatInputBar.tsx
+++ b/web/src/app/chat/input/ChatInputBar.tsx
@@ -403,8 +403,7 @@ export function ChatInputBar({
      setTabbingIconIndex((tabbingIconIndex) =>
        Math.min(
          tabbingIconIndex + 1,
-          // showPrompts ? filteredPrompts.length :
-          assistantTagOptions.length
+          showPrompts ? filteredPrompts.length : assistantTagOptions.length
        )
      );
    } else if (e.key === "ArrowUp") {
@@ -437,8 +436,8 @@ export function ChatInputBar({
                  <button
                    key={index}
                    className={`px-2 ${
-                      tabbingIconIndex == index && "bg-neutral-200"
-                    } rounded items-center rounded-lg content-start flex gap-x-1 py-2 w-full hover:bg-neutral-200/90 cursor-pointer`}
+                      tabbingIconIndex == index && "bg-background-dark/75"
+                    } rounded items-center rounded-lg content-start flex gap-x-1 py-2 w-full hover:bg-background-dark/90 cursor-pointer`}
                    onClick={() => {
                      updatedTaggedAssistant(currentAssistant);
                    }}
@@ -460,8 +459,8 @@ export function ChatInputBar({
                  target="_self"
                  className={`${
                    tabbingIconIndex == assistantTagOptions.length &&
-                    "bg-neutral-200"
-                  } rounded rounded-lg px-3 flex gap-x-1 py-2 w-full items-center hover:bg-neutral-200/90 cursor-pointer`}
+                    "bg-background-dark/75"
+                  } rounded rounded-lg px-3 flex gap-x-1 py-2 w-full items-center hover:bg-background-dark/90 cursor-pointer`}
                  href="/assistants/new"
                >
                  <FiPlus size={17} />
--- a/web/src/app/chat/lib.tsx
+++ b/web/src/app/chat/lib.tsx
@@ -329,7 +329,7 @@ export async function deleteChatSession(chatSessionId: string) {
  return response;
 }

-export async function deleteAllChatSessions() {
+export async function deleteAllChatSessions(sessionType: "Chat" | "Search") {
  const response = await fetch(`/api/chat/delete-all-chat-sessions`, {
    method: "DELETE",
    headers: {
--- a/web/src/app/chat/modal/UserSettingsModal.tsx
+++ b/web/src/app/chat/modal/UserSettingsModal.tsx
@@ -24,9 +24,6 @@ import { Monitor, Moon, Sun } from "lucide-react";
 import { useTheme } from "next-themes";
 import { Button } from "@/components/ui/button";
 import { Input } from "@/components/ui/input";
-import { FiTrash2 } from "react-icons/fi";
-import { deleteAllChatSessions } from "../lib";
-import { useChatContext } from "@/components/context/ChatContext";

 type SettingsSection = "settings" | "password";

@@ -50,8 +47,6 @@ export function UserSettingsModal({
    updateUserShortcuts,
    updateUserTemperatureOverrideEnabled,
  } = useUser();
-  const { refreshChatSessions } = useChatContext();
-  const router = useRouter();
  const containerRef = useRef<HTMLDivElement>(null);
  const messageRef = useRef<HTMLDivElement>(null);
  const { theme, setTheme } = useTheme();
@@ -62,8 +57,6 @@ export function UserSettingsModal({
  const [isLoading, setIsLoading] = useState(false);
  const [activeSection, setActiveSection] =
    useState<SettingsSection>("settings");
-  const [isDeleteAllLoading, setIsDeleteAllLoading] = useState(false);
-  const [showDeleteConfirmation, setShowDeleteConfirmation] = useState(false);

  useEffect(() => {
    const container = containerRef.current;
@@ -139,6 +132,7 @@ export function UserSettingsModal({
    );
  });

+  const router = useRouter();
  const handleChangedefaultModel = async (defaultModel: string | null) => {
    try {
      const response = await setUserDefaultModel(defaultModel);
@@ -211,31 +205,6 @@ export function UserSettingsModal({
  };
  const showPasswordSection = user?.password_configured;

-  const handleDeleteAllChats = async () => {
-    setIsDeleteAllLoading(true);
-    try {
-      const response = await deleteAllChatSessions();
-      if (response.ok) {
-        setPopup({
-          message: "All your chat sessions have been deleted.",
-          type: "success",
-        });
-        refreshChatSessions();
-        router.push("/chat");
-      } else {
-        throw new Error("Failed to delete all chat sessions");
-      }
-    } catch (error) {
-      setPopup({
-        message: "Failed to delete all chat sessions",
-        type: "error",
-      });
-    } finally {
-      setIsDeleteAllLoading(false);
-      setShowDeleteConfirmation(false);
-    }
-  };
-
  return (
    <Modal
      onOutsideClick={onClose}
@@ -381,51 +350,6 @@ export function UserSettingsModal({
                    }}
                  />
                </div>
-                <div className="pt-4 border-t border-border">
-                  {!showDeleteConfirmation ? (
-                    <div className="space-y-3">
-                      <p className="text-sm text-neutral-600 ">
-                        This will permanently delete all your chat sessions and
-                        cannot be undone.
-                      </p>
-                      <Button
-                        variant="destructive"
-                        className="w-full flex items-center justify-center"
-                        onClick={() => setShowDeleteConfirmation(true)}
-                      >
-                        <FiTrash2 className="mr-2" size={14} />
-                        Delete All Chats
-                      </Button>
-                    </div>
-                  ) : (
-                    <div className="space-y-3">
-                      <p className="text-sm text-neutral-600 ">
-                        Are you sure you want to delete all your chat sessions?
-                      </p>
-                      <div className="flex gap-2">
-                        <Button
-                          type="button"
-                          variant="destructive"
-                          className="flex-1 flex items-center justify-center"
-                          onClick={handleDeleteAllChats}
-                          disabled={isDeleteAllLoading}
-                        >
-                          {isDeleteAllLoading
-                            ? "Deleting..."
-                            : "Yes, Delete All"}
-                        </Button>
-                        <Button
-                          variant="outline"
-                          className="flex-1"
-                          onClick={() => setShowDeleteConfirmation(false)}
-                          disabled={isDeleteAllLoading}
-                        >
-                          Cancel
-                        </Button>
-                      </div>
-                    </div>
-                  )}
-                </div>
              </div>
            )}
            {activeSection === "password" && (
--- a/web/src/app/chat/sessionSidebar/HistorySidebar.tsx
+++ b/web/src/app/chat/sessionSidebar/HistorySidebar.tsx
@@ -64,6 +64,7 @@ interface HistorySidebarProps {
  showShareModal?: (chatSession: ChatSession) => void;
  showDeleteModal?: (chatSession: ChatSession) => void;
  explicitlyUntoggle: () => void;
+  showDeleteAllModal?: () => void;
  setShowAssistantsModal: (show: boolean) => void;
  toggleChatSessionSearchModal?: () => void;
 }
@@ -182,6 +183,7 @@ export const HistorySidebar = forwardRef<HTMLDivElement, HistorySidebarProps>(
      showShareModal,
      toggleChatSessionSearchModal,
      showDeleteModal,
+      showDeleteAllModal,
    },
    ref: ForwardedRef<HTMLDivElement>
  ) => {
@@ -401,6 +403,7 @@ export const HistorySidebar = forwardRef<HTMLDivElement, HistorySidebarProps>(
              existingChats={existingChats}
              currentChatId={currentChatId}
              folders={folders}
+              showDeleteAllModal={showDeleteAllModal}
            />
          </div>
        </div>
--- a/web/src/app/chat/sessionSidebar/PagesTab.tsx
+++ b/web/src/app/chat/sessionSidebar/PagesTab.tsx
@@ -10,6 +10,7 @@ import { Folder } from "../folders/interfaces";
 import { usePopup } from "@/components/admin/connectors/Popup";
 import { useRouter } from "next/navigation";
 import { FiPlus, FiTrash2, FiCheck, FiX } from "react-icons/fi";
+import { NEXT_PUBLIC_DELETE_ALL_CHATS_ENABLED } from "@/lib/constants";
 import { FolderDropdown } from "../folders/FolderDropdown";
 import { ChatSessionDisplay } from "./ChatSessionDisplay";
 import { useState, useCallback, useRef, useContext, useEffect } from "react";
@@ -106,6 +107,7 @@ export function PagesTab({
  closeSidebar,
  showShareModal,
  showDeleteModal,
+  showDeleteAllModal,
  toggleChatSessionSearchModal,
 }: {
  existingChats?: ChatSession[];
@@ -115,6 +117,7 @@ export function PagesTab({
  closeSidebar?: () => void;
  showShareModal?: (chatSession: ChatSession) => void;
  showDeleteModal?: (chatSession: ChatSession) => void;
+  showDeleteAllModal?: () => void;
 }) {
  const { setPopup, popup } = usePopup();
  const router = useRouter();
@@ -435,7 +438,11 @@ export function PagesTab({
        </DndContext>
      )}

-      <div className="pl-4 pr-3">
+      <div
+        className={`pl-4 pr-3 ${
+          NEXT_PUBLIC_DELETE_ALL_CHATS_ENABLED && "pb-20"
+        }`}
+      >
        {!isHistoryEmpty && (
          <>
            {Object.entries(groupedChatSesssions)
@@ -472,6 +479,17 @@ export function PagesTab({
          </p>
        )}
      </div>
+      {showDeleteAllModal && NEXT_PUBLIC_DELETE_ALL_CHATS_ENABLED && (
+        <div className="absolute w-full border-t border-t-border bg-background-100 bottom-0 left-0 p-4">
+          <button
+            className="px-4 w-full py-2 px-4 text-text-600 hover:text-text-800 bg-background-125 border border-border-strong/50 shadow-sm rounded-md transition-colors duration-200 flex items-center justify-center text-sm"
+            onClick={showDeleteAllModal}
+          >
+            <FiTrash2 className="mr-2" size={14} />
+            Clear All History
+          </button>
+        </div>
+      )}
    </div>
  );
 }
--- a/web/src/app/chat/shared/[chatId]/SharedChatDisplay.tsx
+++ b/web/src/app/chat/shared/[chatId]/SharedChatDisplay.tsx
@@ -206,7 +206,7 @@ export function SharedChatDisplay({
                      {chatSession.description || `Unnamed Chat`}
                    </h1>
                    <p className=" text-text-darker">
-                      {humanReadableFormat(chatSession.time_created)}
+                      {humanReadableFormat(chatSession.time_updated)}
                    </p>
                    <div
                      className={`
--- a/web/src/app/ee/admin/groups/ConnectorEditor.tsx
+++ b/web/src/app/ee/admin/groups/ConnectorEditor.tsx
@@ -1,5 +1,5 @@
-import { ConnectorStatus } from "@/lib/types";
-import { ConnectorMultiSelect } from "@/components/ConnectorMultiSelect";
+import { ConnectorIndexingStatus, ConnectorStatus } from "@/lib/types";
+import { ConnectorTitle } from "@/components/admin/connectors/ConnectorTitle";

 interface ConnectorEditorProps {
  selectedCCPairIds: number[];
@@ -12,20 +12,55 @@ export const ConnectorEditor = ({
  setSetCCPairIds,
  allCCPairs,
 }: ConnectorEditorProps) => {
-  // Filter out public docs, since they don't make sense as part of a group
-  const privateCCPairs = allCCPairs.filter(
-    (ccPair) => ccPair.access_type === "private"
-  );
-
  return (
-    <ConnectorMultiSelect
-      name="connectors"
-      label="Connectors"
-      connectors={privateCCPairs}
-      selectedIds={selectedCCPairIds}
-      onChange={setSetCCPairIds}
-      placeholder="Search for connectors..."
-      showError={true}
-    />
+    <div className="mb-3 flex gap-2 flex-wrap">
+      {allCCPairs
+        // remove public docs, since they don't make sense as part of a group
+        .filter((ccPair) => !(ccPair.access_type === "public"))
+        .map((ccPair) => {
+          const ind = selectedCCPairIds.indexOf(ccPair.cc_pair_id);
+          const isSelected = ind !== -1;
+          return (
+            <div
+              key={`${ccPair.connector.id}-${ccPair.credential.id}`}
+              className={
+                `
+          px-3 
+          py-1
+          rounded-lg 
+          border
+          border-border 
+          w-fit 
+          flex 
+          cursor-pointer ` +
+                (isSelected
+                  ? " bg-accent-background-hovered"
+                  : " hover:bg-accent-background")
+              }
+              onClick={() => {
+                if (isSelected) {
+                  setSetCCPairIds(
+                    selectedCCPairIds.filter(
+                      (ccPairId) => ccPairId !== ccPair.cc_pair_id
+                    )
+                  );
+                } else {
+                  setSetCCPairIds([...selectedCCPairIds, ccPair.cc_pair_id]);
+                }
+              }}
+            >
+              <div className="my-auto">
+                <ConnectorTitle
+                  connector={ccPair.connector}
+                  ccPairId={ccPair.cc_pair_id}
+                  ccPairName={ccPair.name}
+                  isLink={false}
+                  showMetadata={false}
+                />
+              </div>
+            </div>
+          );
+        })}
+    </div>
  );
 };
--- a/web/src/app/ee/admin/groups/[groupId]/AddConnectorForm.tsx
+++ b/web/src/app/ee/admin/groups/[groupId]/AddConnectorForm.tsx
@@ -1,15 +1,13 @@
 import { Button } from "@/components/Button";
+import { SearchMultiSelectDropdown } from "@/components/Dropdown";
 import { Modal } from "@/components/Modal";
 import { useState } from "react";
-import { FiX } from "react-icons/fi";
+import { FiPlus, FiX } from "react-icons/fi";
 import { updateUserGroup } from "./lib";
 import { PopupSpec } from "@/components/admin/connectors/Popup";
 import { ConnectorStatus, UserGroup } from "@/lib/types";
 import { ConnectorTitle } from "@/components/admin/connectors/ConnectorTitle";
 import { Connector } from "@/lib/connectors/connectors";
-import { ConnectorMultiSelect } from "@/components/ConnectorMultiSelect";
-import { Form } from "formik";
-
 interface AddConnectorFormProps {
  ccPairs: ConnectorStatus<any, any>[];
  userGroup: UserGroup;
@@ -25,68 +23,132 @@ export const AddConnectorForm: React.FC<AddConnectorFormProps> = ({
 }) => {
  const [selectedCCPairIds, setSelectedCCPairIds] = useState<number[]>([]);

-  // Filter out ccPairs that are already in the user group and are not private
-  const availableCCPairs = ccPairs
-    .filter(
-      (ccPair) =>
-        !userGroup.cc_pairs
-          .map((userGroupCCPair) => userGroupCCPair.id)
-          .includes(ccPair.cc_pair_id)
-    )
-    .filter((ccPair) => ccPair.access_type === "private");
-
+  const selectedCCPairs = ccPairs.filter((ccPair) =>
+    selectedCCPairIds.includes(ccPair.cc_pair_id)
+  );
  return (
-    <Modal
-      className="max-w-3xl"
-      title="Add New Connector"
-      onOutsideClick={() => onClose()}
-    >
-      <div className="px-6 pt-4">
-        <ConnectorMultiSelect
-          name="connectors"
-          label="Select Connectors"
-          connectors={availableCCPairs}
-          selectedIds={selectedCCPairIds}
-          onChange={setSelectedCCPairIds}
-          placeholder="Search for connectors to add..."
-          showError={false}
-        />
+    <Modal title="Add New Connector" onOutsideClick={() => onClose()}>
+      <div className="px-6 pt-4 pb-12">
+        <div className="mb-2 flex flex-wrap gap-x-2">
+          {selectedCCPairs.length > 0 &&
+            selectedCCPairs.map((ccPair) => (
+              <div
+                key={ccPair.cc_pair_id}
+                onClick={() => {
+                  setSelectedCCPairIds(
+                    selectedCCPairIds.filter(
+                      (ccPairId) => ccPairId !== ccPair.cc_pair_id
+                    )
+                  );
+                }}
+                className={`
+                  flex 
+                  rounded-lg 
+                  px-2 
+                  py-1
+                  my-1 
+                  border 
+                  border-border 
+                  hover:bg-accent-background-hovered 
+                  cursor-pointer`}
+              >
+                <ConnectorTitle
+                  ccPairId={ccPair.cc_pair_id}
+                  ccPairName={ccPair.name}
+                  connector={ccPair.connector}
+                  isLink={false}
+                  showMetadata={false}
+                />
+                <FiX className="ml-1 my-auto" />
+              </div>
+            ))}
+        </div>

-        <Button
-          className="mt-4 flex-nowrap w-48"
-          onClick={async () => {
-            const newCCPairIds = [
-              ...Array.from(
-                new Set(
-                  userGroup.cc_pairs
-                    .map((ccPair) => ccPair.id)
-                    .concat(selectedCCPairIds)
-                )
-              ),
-            ];
-            const response = await updateUserGroup(userGroup.id, {
-              user_ids: userGroup.users.map((user) => user.id),
-              cc_pair_ids: newCCPairIds,
-            });
-            if (response.ok) {
-              setPopup({
-                message: "Successfully added connectors to group",
-                type: "success",
+        <div className="flex">
+          <SearchMultiSelectDropdown
+            options={ccPairs
+              .filter(
+                (ccPair) =>
+                  !selectedCCPairIds.includes(ccPair.cc_pair_id) &&
+                  !userGroup.cc_pairs
+                    .map((userGroupCCPair) => userGroupCCPair.id)
+                    .includes(ccPair.cc_pair_id)
+              )
+              // remove public and synced docs, since they don't make sense as part of a group
+              .filter((ccPair) => ccPair.access_type === "private")
+              .map((ccPair) => {
+                return {
+                  name: ccPair.name?.toString() || "",
+                  value: ccPair.cc_pair_id?.toString(),
+                  metadata: {
+                    ccPairId: ccPair.cc_pair_id,
+                    connector: ccPair.connector,
+                  },
+                };
+              })}
+            onSelect={(option) => {
+              setSelectedCCPairIds([
+                ...Array.from(
+                  new Set([
+                    ...selectedCCPairIds,
+                    parseInt(option.value as string),
+                  ])
+                ),
+              ]);
+            }}
+            itemComponent={({ option }) => (
+              <div className="flex px-4 py-2.5 hover:bg-accent-background-hovered cursor-pointer">
+                <div className="my-auto">
+                  <ConnectorTitle
+                    ccPairId={option?.metadata?.ccPairId as number}
+                    ccPairName={option.name}
+                    connector={option?.metadata?.connector as Connector<any>}
+                    isLink={false}
+                    showMetadata={false}
+                  />
+                </div>
+                <div className="ml-auto my-auto">
+                  <FiPlus />
+                </div>
+              </div>
+            )}
+          />
+          <Button
+            className="ml-3 flex-nowrap w-48"
+            onClick={async () => {
+              const newCCPairIds = [
+                ...Array.from(
+                  new Set(
+                    userGroup.cc_pairs
+                      .map((ccPair) => ccPair.id)
+                      .concat(selectedCCPairIds)
+                  )
+                ),
+              ];
+              const response = await updateUserGroup(userGroup.id, {
+                user_ids: userGroup.users.map((user) => user.id),
+                cc_pair_ids: newCCPairIds,
              });
-              onClose();
-            } else {
-              const responseJson = await response.json();
-              const errorMsg = responseJson.detail || responseJson.message;
-              setPopup({
-                message: `Failed to add connectors to group - ${errorMsg}`,
-                type: "error",
-              });
-              onClose();
-            }
-          }}
-        >
-          Add Connectors
-        </Button>
+              if (response.ok) {
+                setPopup({
+                  message: "Successfully added users to group",
+                  type: "success",
+                });
+                onClose();
+              } else {
+                const responseJson = await response.json();
+                const errorMsg = responseJson.detail || responseJson.message;
+                setPopup({
+                  message: `Failed to add users to group - ${errorMsg}`,
+                  type: "error",
+                });
+                onClose();
+              }
+            }}
+          >
+            Add Connectors
+          </Button>
+        </div>
      </div>
    </Modal>
  );
--- a/web/src/components/ConnectorMultiSelect.tsx
+++ b/web/src/components/ConnectorMultiSelect.tsx
@@ -1,232 +0,0 @@
-import React, { useState, useRef, useEffect } from "react";
-import { ConnectorStatus } from "@/lib/types";
-import { ConnectorTitle } from "@/components/admin/connectors/ConnectorTitle";
-import { X, Search } from "lucide-react";
-import { Label } from "@/components/ui/label";
-import { ErrorMessage } from "formik";
-
-interface ConnectorMultiSelectProps {
-  name: string;
-  label: string;
-  connectors: ConnectorStatus<any, any>[];
-  selectedIds: number[];
-  onChange: (selectedIds: number[]) => void;
-  disabled?: boolean;
-  placeholder?: string;
-  showError?: boolean;
-}
-
-export const ConnectorMultiSelect = ({
-  name,
-  label,
-  connectors,
-  selectedIds,
-  onChange,
-  disabled = false,
-  placeholder = "Search connectors...",
-  showError = false,
-}: ConnectorMultiSelectProps) => {
-  const [open, setOpen] = useState(false);
-  const [searchQuery, setSearchQuery] = useState("");
-  const dropdownRef = useRef<HTMLDivElement>(null);
-  const inputRef = useRef<HTMLInputElement>(null);
-
-  const selectedConnectors = connectors.filter((connector) =>
-    selectedIds.includes(connector.cc_pair_id)
-  );
-
-  const unselectedConnectors = connectors.filter(
-    (connector) => !selectedIds.includes(connector.cc_pair_id)
-  );
-
-  const allConnectorsSelected = unselectedConnectors.length === 0;
-
-  const filteredUnselectedConnectors = unselectedConnectors.filter(
-    (connector) => {
-      const connectorName = connector.name || connector.connector.source;
-      return connectorName.toLowerCase().includes(searchQuery.toLowerCase());
-    }
-  );
-
-  useEffect(() => {
-    if (allConnectorsSelected && open) {
-      setOpen(false);
-      inputRef.current?.blur();
-      setSearchQuery("");
-    }
-  }, [allConnectorsSelected, open]);
-
-  useEffect(() => {
-    if (allConnectorsSelected) {
-      inputRef.current?.blur();
-      setSearchQuery("");
-    }
-  }, [allConnectorsSelected, selectedIds]);
-
-  const selectConnector = (connectorId: number) => {
-    const newSelectedIds = [...selectedIds, connectorId];
-    onChange(newSelectedIds);
-    setSearchQuery("");
-
-    const willAllBeSelected = connectors.length === newSelectedIds.length;
-
-    if (!willAllBeSelected) {
-      setTimeout(() => {
-        inputRef.current?.focus();
-      }, 0);
-    }
-  };
-
-  const removeConnector = (connectorId: number) => {
-    onChange(selectedIds.filter((id) => id !== connectorId));
-  };
-
-  useEffect(() => {
-    const handleClickOutside = (event: MouseEvent) => {
-      if (
-        dropdownRef.current &&
-        !dropdownRef.current.contains(event.target as Node) &&
-        inputRef.current !== event.target &&
-        !inputRef.current?.contains(event.target as Node)
-      ) {
-        setOpen(false);
-      }
-    };
-
-    document.addEventListener("mousedown", handleClickOutside);
-    return () => {
-      document.removeEventListener("mousedown", handleClickOutside);
-    };
-  }, []);
-
-  const handleKeyDown = (e: React.KeyboardEvent) => {
-    if (e.key === "Escape") {
-      setOpen(false);
-    }
-  };
-
-  const effectivePlaceholder = allConnectorsSelected
-    ? "All connectors selected"
-    : placeholder;
-
-  const isInputDisabled = disabled || allConnectorsSelected;
-
-  return (
-    <div className="flex flex-col w-full space-y-2 mb-4">
-      {label && <Label className="text-base font-medium">{label}</Label>}
-
-      <p className="text-xs text-neutral-500 ">
-        All documents indexed by the selected connectors will be part of this
-        document set.
-      </p>
-      <div className="relative">
-        <div
-          className={`flex items-center border border-input rounded-md border border-neutral-200 ${
-            allConnectorsSelected ? "bg-neutral-50" : ""
-          } focus-within:ring-1 focus-within:ring-ring focus-within:border-neutral-400 transition-colors`}
-        >
-          <Search className="absolute left-3 h-4 w-4 text-neutral-500" />
-          <input
-            ref={inputRef}
-            type="text"
-            value={searchQuery}
-            onChange={(e) => {
-              setSearchQuery(e.target.value);
-              setOpen(true);
-            }}
-            onFocus={() => {
-              if (!allConnectorsSelected) {
-                setOpen(true);
-              }
-            }}
-            onKeyDown={handleKeyDown}
-            placeholder={effectivePlaceholder}
-            className={`h-9 w-full pl-9 pr-10 py-2 bg-transparent text-sm outline-none disabled:cursor-not-allowed disabled:opacity-50 ${
-              allConnectorsSelected ? "text-neutral-500" : ""
-            }`}
-            disabled={isInputDisabled}
-          />
-        </div>
-
-        {open && !allConnectorsSelected && (
-          <div
-            ref={dropdownRef}
-            className="absolute z-50 w-full mt-1 rounded-md border border-neutral-200 bg-white shadow-md default-scrollbar max-h-[300px] overflow-auto"
-          >
-            {filteredUnselectedConnectors.length === 0 ? (
-              <div className="py-4 text-center text-xs text-neutral-500">
-                {searchQuery
-                  ? "No matching connectors found"
-                  : "No more connectors available"}
-              </div>
-            ) : (
-              <div>
-                {filteredUnselectedConnectors.map((connector) => (
-                  <div
-                    key={connector.cc_pair_id}
-                    className="flex items-center justify-between py-2 px-3 cursor-pointer hover:bg-neutral-50 text-xs"
-                    onClick={() => selectConnector(connector.cc_pair_id)}
-                  >
-                    <div className="flex items-center truncate mr-2">
-                      <ConnectorTitle
-                        connector={connector.connector}
-                        ccPairId={connector.cc_pair_id}
-                        ccPairName={connector.name}
-                        isLink={false}
-                        showMetadata={false}
-                      />
-                    </div>
-                  </div>
-                ))}
-              </div>
-            )}
-          </div>
-        )}
-      </div>
-
-      {selectedConnectors.length > 0 ? (
-        <div className="mt-3 ">
-          <div className="flex flex-wrap gap-1.5">
-            {selectedConnectors.map((connector) => (
-              <div
-                key={connector.cc_pair_id}
-                className="flex items-center bg-white rounded-md border border-neutral-300 transition-all px-2 py-1 max-w-full group text-xs"
-              >
-                <div className="flex items-center overflow-hidden">
-                  <div className="flex-shrink-0 text-xs">
-                    <ConnectorTitle
-                      connector={connector.connector}
-                      ccPairId={connector.cc_pair_id}
-                      ccPairName={connector.name}
-                      isLink={false}
-                      showMetadata={false}
-                    />
-                  </div>
-                </div>
-                <button
-                  className="ml-1 flex-shrink-0 rounded-full w-4 h-4 flex items-center justify-center bg-neutral-100 text-neutral-500 hover:bg-neutral-200 hover:text-neutral-700 transition-colors group-hover:bg-neutral-200"
-                  onClick={() => removeConnector(connector.cc_pair_id)}
-                  aria-label="Remove connector"
-                >
-                  <X className="h-2.5 w-2.5" />
-                </button>
-              </div>
-            ))}
-          </div>
-        </div>
-      ) : (
-        <div className="mt-3 p-3 border border-dashed border-neutral-300 rounded-md bg-neutral-50 text-neutral-500 text-xs">
-          No connectors selected. Search and select connectors above.
-        </div>
-      )}
-
-      {showError && (
-        <ErrorMessage
-          name={name}
-          component="div"
-          className="text-red-500 text-xs mt-1"
-        />
-      )}
-    </div>
-  );
-};
--- a/web/src/components/NonSelectableConnectors.tsx
+++ b/web/src/components/NonSelectableConnectors.tsx
@@ -1,52 +0,0 @@
-import React from "react";
-import { ConnectorStatus } from "@/lib/types";
-import { ConnectorTitle } from "@/components/admin/connectors/ConnectorTitle";
-import { Label } from "@/components/ui/label";
-import { LockIcon } from "lucide-react";
-
-interface NonSelectableConnectorsProps {
-  connectors: ConnectorStatus<any, any>[];
-  title: string;
-  description: string;
-}
-
-export const NonSelectableConnectors = ({
-  connectors,
-  title,
-  description,
-}: NonSelectableConnectorsProps) => {
-  if (connectors.length === 0) {
-    return null;
-  }
-
-  return (
-    <div className="mt-6 mb-4">
-      <Label className="text-base font-medium mb-1">{title}</Label>
-      <p className="text-xs text-neutral-500 mb-3">{description}</p>
-      <div className="p-3 border border-dashed border-neutral-300 rounded-md bg-neutral-50">
-        <div className="text-xs font-medium text-neutral-700 mb-2 flex items-center">
-          <LockIcon className="h-3.5 w-3.5 mr-1.5 text-neutral-500" />
-          Unavailable connectors:
-        </div>
-        <div className="flex flex-wrap gap-1.5">
-          {connectors.map((connector) => (
-            <div
-              key={`${connector.connector.id}-${connector.credential.id}`}
-              className="flex items-center px-2 py-1 cursor-not-allowed opacity-80 bg-white border border-neutral-300 rounded-md text-xs"
-            >
-              <div className="flex items-center max-w-[200px] text-xs">
-                <ConnectorTitle
-                  connector={connector.connector}
-                  ccPairId={connector.cc_pair_id}
-                  ccPairName={connector.name}
-                  isLink={false}
-                  showMetadata={false}
-                />
-              </div>
-            </div>
-          ))}
-        </div>
-      </div>
-    </div>
-  );
-};
--- a/web/src/components/admin/connectors/ConnectorTitle.tsx
+++ b/web/src/components/admin/connectors/ConnectorTitle.tsx
@@ -20,7 +20,6 @@ interface ConnectorTitleProps {
  owner?: string;
  isLink?: boolean;
  showMetadata?: boolean;
-  className?: string;
 }

 export const ConnectorTitle = ({
@@ -31,7 +30,6 @@ export const ConnectorTitle = ({
  isPublic = true,
  isLink = true,
  showMetadata = true,
-  className = "",
 }: ConnectorTitleProps) => {
  const sourceMetadata = getSourceMetadata(connector.source);

@@ -90,17 +88,17 @@ export const ConnectorTitle = ({
    );
  }

-  const mainSectionClassName = `text-blue-500 dark:text-blue-100 flex w-fit ${className}`;
+  const mainSectionClassName = "text-blue-500 dark:text-blue-100 flex w-fit";
  const mainDisplay = (
    <>
-      {sourceMetadata.icon({ size: 16 })}
-      <div className="ml-1 my-auto text-xs font-medium truncate">
+      {sourceMetadata.icon({ size: 20 })}
+      <div className="ml-1 my-auto">
        {ccPairName || sourceMetadata.displayName}
      </div>
    </>
  );
  return (
-    <div className="my-auto max-w-full">
+    <div className="my-auto">
      {isLink ? (
        <Link
          className={mainSectionClassName}
@@ -112,10 +110,10 @@ export const ConnectorTitle = ({
        <div className={mainSectionClassName}>{mainDisplay}</div>
      )}
      {showMetadata && additionalMetadata.size > 0 && (
-        <div className="text-[10px] mt-0.5 text-gray-600 dark:text-gray-400">
+        <div className="text-xs mt-1">
          {Array.from(additionalMetadata.entries()).map(([key, value]) => {
            return (
-              <div key={key} className="truncate">
+              <div key={key}>
                <i>{key}:</i> {value}
              </div>
            );
--- a/web/src/components/admin/users/SignedUpUserTable.tsx
+++ b/web/src/components/admin/users/SignedUpUserTable.tsx
@@ -181,7 +181,7 @@ const SignedUpUserTable = ({
                : "All Roles"}
            </SelectValue>
          </SelectTrigger>
-          <SelectContent className="bg-background">
+          <SelectContent className="bg-background-50">
            {Object.entries(USER_ROLE_LABELS)
              .filter(([role]) => role !== UserRole.EXT_PERM_USER)
              .map(([role, label]) => (
--- a/web/src/components/chat/TextView.tsx
+++ b/web/src/components/chat/TextView.tsx
@@ -41,15 +41,6 @@ export default function TextView({
    return markdownFormats.some((format) => mimeType.startsWith(format));
  };

-  const isImageFormat = (mimeType: string) => {
-    const imageFormats = [
-      "image/png",
-      "image/jpeg",
-      "image/gif",
-      "image/svg+xml",
-    ];
-    return imageFormats.some((format) => mimeType.startsWith(format));
-  };
  // Detect if a given MIME type can be rendered in an <iframe>
  const isSupportedIframeFormat = (mimeType: string): boolean => {
    const supportedFormats = [
@@ -135,7 +126,6 @@ export default function TextView({
          <DialogTitle className="text-lg font-medium truncate">
            {fileName}
          </DialogTitle>
-
          <div className="flex items-center space-x-2">
            <Button variant="ghost" size="icon" onClick={handleZoomOut}>
              <ZoomOut className="h-4 w-4" />
@@ -171,13 +161,7 @@ export default function TextView({
                className="w-full h-full transform origin-center transition-transform duration-300 ease-in-out"
                style={{ transform: `scale(${zoom / 100})` }}
              >
-                {isImageFormat(fileType) ? (
-                  <img
-                    src={fileUrl}
-                    alt={fileName}
-                    className="w-full h-full object-contain object-center"
-                  />
-                ) : isSupportedIframeFormat(fileType) ? (
+                {isSupportedIframeFormat(fileType) ? (
                  <iframe
                    src={`${fileUrl}#toolbar=0`}
                    className="w-full h-full border-none"
--- a/web/src/components/embedding/interfaces.tsx
+++ b/web/src/components/embedding/interfaces.tsx
@@ -268,7 +268,7 @@ export const AVAILABLE_CLOUD_PROVIDERS: CloudEmbeddingProvider[] = [
    embedding_models: [
      {
        provider_type: EmbeddingProvider.GOOGLE,
-        model_name: "text-embedding-005",
+        model_name: "text-embedding-004",
        description: "Google's most recent text embedding model.",
        pricePerMillion: 0.025,
        model_dim: 768,
--- a/web/src/components/health/refreshUtils.ts
+++ b/web/src/components/health/refreshUtils.ts
@@ -25,8 +25,8 @@ export function mockedRefreshToken(): CustomRefreshTokenResponse {
   */
  const mockExp = Date.now() + 3600000; // 1 hour from now in milliseconds
  const data: CustomRefreshTokenResponse = {
-    access_token: "Mock access token",
-    refresh_token: "Mock refresh token",
+    access_token: "asdf Mock access token",
+    refresh_token: "asdf Mock refresh token",
    session: { exp: mockExp },
    userinfo: {
      sub: "Mock email",
--- a/web/src/components/icons/icons.tsx
+++ b/web/src/components/icons/icons.tsx
@@ -38,7 +38,6 @@ import { SiBookstack } from "react-icons/si";
 import Image, { StaticImageData } from "next/image";
 import jiraSVG from "../../../public/Jira.svg";
 import confluenceSVG from "../../../public/Confluence.svg";
-import deepseekSVG from "../../../public/Deepseek.svg";
 import openAISVG from "../../../public/Openai.svg";
 import amazonSVG from "../../../public/Amazon.svg";
 import geminiSVG from "../../../public/Gemini.svg";
@@ -1150,13 +1149,6 @@ export const MetaIcon = ({
  className = defaultTailwindCSS,
 }: IconProps) => <LogoIcon size={size} className={className} src={metaSVG} />;

-export const DeepseekIcon = ({
-  size = 16,
-  className = defaultTailwindCSS,
-}: IconProps) => (
-  <LogoIcon size={size} className={className} src={deepseekSVG} />
-);
-
 export const MicrosoftIconSVG = ({
  size = 16,
  className = defaultTailwindCSS,
@@ -3286,25 +3278,18 @@ export const CirclingArrowIcon = ({
        </g>
      </g>
    </svg>
-  );
-};

-export const SortIcon = ({
-  size = 24,
-  className = defaultTailwindCSS,
-}: IconProps) => {
-  return (
-    <svg
-      style={{ width: `${size}px`, height: `${size}px` }}
-      className={`w-[${size}px] h-[${size}px] ` + className}
-      fill="currentColor"
-      xmlns="http://www.w3.org/2000/svg"
-      viewBox="0 0 24 24"
-    >
-      <path
-        fill="currentColor"
-        d="M22 18.605a.75.75 0 0 1-.75.75h-5.1a2.93 2.93 0 0 1-5.66 0H2.75a.75.75 0 1 1 0-1.5h7.74a2.93 2.93 0 0 1 5.66 0h5.1a.75.75 0 0 1 .75.75m0-13.21a.75.75 0 0 1-.75.75H18.8a2.93 2.93 0 0 1-5.66 0H2.75a.75.75 0 1 1 0-1.5h10.39a2.93 2.93 0 0 1 5.66 0h2.45a.74.74 0 0 1 .75.75m0 6.6a.74.74 0 0 1-.75.75H9.55a2.93 2.93 0 0 1-5.66 0H2.75a.75.75 0 1 1 0-1.5h1.14a2.93 2.93 0 0 1 5.66 0h11.7a.75.75 0 0 1 .75.75"
-      />
-    </svg>
+    // <svg
+    //   style={{ width: `${size}px`, height: `${size}px` }}
+    //   className={`w-[${size}px] h-[${size}px] ` + className}
+    //   viewBox="0 0 112.62 120.72"
+    //   data-name="Layer 1"
+    //   xmlns="http://www.w3.org/2000/svg"
+    // >
+    //   <path
+    //     strokeWidth={100}
+    //     d="M11.64,100.12l-.4-.47-1.06,8.63a5.08,5.08,0,0,1-1.92,3.41A5.11,5.11,0,0,1,0,107L2.79,84.65v-.07a3.28,3.28,0,0,1,.08-.41h0A5.09,5.09,0,0,1,9,80.39q11.22,2.53,22.42,5.15a5,5,0,0,1,3.17,2.25,5.14,5.14,0,0,1,.64,3.84v0a5,5,0,0,1-2.25,3.16,5.08,5.08,0,0,1-3.83.65c-3.31-.75-6.62-1.52-9.92-2.28a40.71,40.71,0,0,0,2.84,3,50.09,50.09,0,0,0,26.23,13.49,48.67,48.67,0,0,0,14.71.34A47.35,47.35,0,0,0,77,106h0q2.52-1.19,4.83-2.54c1.56-.93,3.07-1.92,4.51-3a50.8,50.8,0,0,0,8.56-7.88,48.92,48.92,0,0,0,6.39-9.45l.56-1.1,10,2.69-.8,1.66a58.64,58.64,0,0,1-7.9,12.24,61.28,61.28,0,0,1-10.81,10.1c-1.68,1.23-3.46,2.4-5.32,3.5s-3.73,2.07-5.74,3a58,58,0,0,1-17,5,58.56,58.56,0,0,1-17.79-.39,60.21,60.21,0,0,1-31.58-16.26c-1.2-1.16-2.26-2.31-3.24-3.45ZM101,20.6l.4.47,1-8.63a5.11,5.11,0,1,1,10.14,1.26l-2.74,22.37,0,.07c0,.13,0,.27-.07.41h0a5.09,5.09,0,0,1-6.08,3.78c-7.47-1.69-15-3.4-22.42-5.15a5,5,0,0,1-3.16-2.25,5.1,5.1,0,0,1-.65-3.84v0a5,5,0,0,1,2.25-3.16,5.1,5.1,0,0,1,3.84-.65c3.31.75,6.61,1.52,9.92,2.28-.84-1-1.77-2-2.84-3.05a50.09,50.09,0,0,0-12.13-8.73A49.49,49.49,0,0,0,64.37,11a48.6,48.6,0,0,0-14.7-.34,47.26,47.26,0,0,0-14,4.1h0q-2.53,1.18-4.83,2.54c-1.57.93-3.07,1.92-4.52,3a50.34,50.34,0,0,0-8.55,7.88,48,48,0,0,0-6.39,9.45l-.57,1.1L.76,36l.8-1.66A58.9,58.9,0,0,1,9.46,22.1,61.63,61.63,0,0,1,20.27,12q2.54-1.85,5.32-3.5c1.81-1.06,3.73-2.07,5.74-3a58,58,0,0,1,17-5A58.56,58.56,0,0,1,66.16.89a59.77,59.77,0,0,1,17,5.74A60.4,60.4,0,0,1,97.75,17.15c1.19,1.16,2.26,2.31,3.24,3.45Z"
+    //   />
+    // </svg>
  );
 };
--- a/web/src/components/llm/LLMSelector.tsx
+++ b/web/src/components/llm/LLMSelector.tsx
@@ -85,7 +85,7 @@ export const LLMSelector: React.FC<LLMSelectorProps> = ({
          <span>{userSettings ? "System Default" : "User Default"}</span>
          {userSettings && (
            <span className=" my-auto font-normal ml-1">
-              ({defaultModelDisplayName})
+              ({defaultModelDisplayName}) asdf
            </span>
          )}
        </SelectItem>
--- a/web/src/components/ui/command.tsx
+++ b/web/src/components/ui/command.tsx
@@ -1,153 +0,0 @@
-"use client";
-
-import * as React from "react";
-import { type DialogProps } from "@radix-ui/react-dialog";
-import { Command as CommandPrimitive } from "cmdk";
-import { Search } from "lucide-react";
-
-import { cn } from "@/lib/utils";
-import { Dialog, DialogContent } from "@/components/ui/dialog";
-
-const Command = React.forwardRef<
-  React.ElementRef<typeof CommandPrimitive>,
-  React.ComponentPropsWithoutRef<typeof CommandPrimitive>
->(({ className, ...props }, ref) => (
-  <CommandPrimitive
-    ref={ref}
-    className={cn(
-      "flex h-full w-full flex-col overflow-hidden rounded-md bg-white text-neutral-950 dark:bg-neutral-950 dark:text-neutral-50",
-      className
-    )}
-    {...props}
-  />
-));
-Command.displayName = CommandPrimitive.displayName;
-
-const CommandDialog = ({ children, ...props }: DialogProps) => {
-  return (
-    <Dialog {...props}>
-      <DialogContent className="overflow-hidden p-0 shadow-lg">
-        <Command className="[&_[cmdk-group-heading]]:px-2 [&_[cmdk-group-heading]]:font-medium [&_[cmdk-group-heading]]:text-neutral-500 [&_[cmdk-group]:not([hidden])_~[cmdk-group]]:pt-0 [&_[cmdk-group]]:px-2 [&_[cmdk-input-wrapper]_svg]:h-5 [&_[cmdk-input-wrapper]_svg]:w-5 [&_[cmdk-input]]:h-12 [&_[cmdk-item]]:px-2 [&_[cmdk-item]]:py-3 [&_[cmdk-item]_svg]:h-5 [&_[cmdk-item]_svg]:w-5 dark:[&_[cmdk-group-heading]]:text-neutral-400">
-          {children}
-        </Command>
-      </DialogContent>
-    </Dialog>
-  );
-};
-
-const CommandInput = React.forwardRef<
-  React.ElementRef<typeof CommandPrimitive.Input>,
-  React.ComponentPropsWithoutRef<typeof CommandPrimitive.Input>
->(({ className, ...props }, ref) => (
-  <div className="flex items-center border-b px-3" cmdk-input-wrapper="">
-    <Search className="mr-2 h-4 w-4 shrink-0 opacity-50" />
-    <CommandPrimitive.Input
-      ref={ref}
-      className={cn(
-        "flex h-11 w-full rounded-md bg-transparent py-3 text-sm outline-none placeholder:text-neutral-500 disabled:cursor-not-allowed disabled:opacity-50 dark:placeholder:text-neutral-400",
-        className
-      )}
-      {...props}
-    />
-  </div>
-));
-
-CommandInput.displayName = CommandPrimitive.Input.displayName;
-
-const CommandList = React.forwardRef<
-  React.ElementRef<typeof CommandPrimitive.List>,
-  React.ComponentPropsWithoutRef<typeof CommandPrimitive.List>
->(({ className, ...props }, ref) => (
-  <CommandPrimitive.List
-    ref={ref}
-    className={cn("max-h-[300px] overflow-y-auto overflow-x-hidden", className)}
-    {...props}
-  />
-));
-
-CommandList.displayName = CommandPrimitive.List.displayName;
-
-const CommandEmpty = React.forwardRef<
-  React.ElementRef<typeof CommandPrimitive.Empty>,
-  React.ComponentPropsWithoutRef<typeof CommandPrimitive.Empty>
->((props, ref) => (
-  <CommandPrimitive.Empty
-    ref={ref}
-    className="py-6 text-center text-sm"
-    {...props}
-  />
-));
-
-CommandEmpty.displayName = CommandPrimitive.Empty.displayName;
-
-const CommandGroup = React.forwardRef<
-  React.ElementRef<typeof CommandPrimitive.Group>,
-  React.ComponentPropsWithoutRef<typeof CommandPrimitive.Group>
->(({ className, ...props }, ref) => (
-  <CommandPrimitive.Group
-    ref={ref}
-    className={cn(
-      "overflow-hidden p-1 text-neutral-950 [&_[cmdk-group-heading]]:px-2 [&_[cmdk-group-heading]]:py-1.5 [&_[cmdk-group-heading]]:text-xs [&_[cmdk-group-heading]]:font-medium [&_[cmdk-group-heading]]:text-neutral-500 dark:text-neutral-50 dark:[&_[cmdk-group-heading]]:text-neutral-400",
-      className
-    )}
-    {...props}
-  />
-));
-
-CommandGroup.displayName = CommandPrimitive.Group.displayName;
-
-const CommandSeparator = React.forwardRef<
-  React.ElementRef<typeof CommandPrimitive.Separator>,
-  React.ComponentPropsWithoutRef<typeof CommandPrimitive.Separator>
->(({ className, ...props }, ref) => (
-  <CommandPrimitive.Separator
-    ref={ref}
-    className={cn("-mx-1 h-px bg-neutral-200 dark:bg-neutral-800", className)}
-    {...props}
-  />
-));
-CommandSeparator.displayName = CommandPrimitive.Separator.displayName;
-
-const CommandItem = React.forwardRef<
-  React.ElementRef<typeof CommandPrimitive.Item>,
-  React.ComponentPropsWithoutRef<typeof CommandPrimitive.Item>
->(({ className, ...props }, ref) => (
-  <CommandPrimitive.Item
-    ref={ref}
-    className={cn(
-      "relative flex cursor-default gap-2 select-none items-center rounded-sm px-2 py-1.5 text-sm outline-none data-[disabled=true]:pointer-events-none data-[selected='true']:bg-neutral-100 data-[selected=true]:text-neutral-900 data-[disabled=true]:opacity-50 [&_svg]:pointer-events-none [&_svg]:size-4 [&_svg]:shrink-0 dark:data-[selected='true']:bg-neutral-800 dark:data-[selected=true]:text-neutral-50",
-      className
-    )}
-    {...props}
-  />
-));
-
-CommandItem.displayName = CommandPrimitive.Item.displayName;
-
-const CommandShortcut = ({
-  className,
-  ...props
-}: React.HTMLAttributes<HTMLSpanElement>) => {
-  return (
-    <span
-      className={cn(
-        "ml-auto text-xs tracking-widest text-neutral-500 dark:text-neutral-400",
-        className
-      )}
-      {...props}
-    />
-  );
-};
-CommandShortcut.displayName = "CommandShortcut";
-
-export {
-  Command,
-  CommandDialog,
-  CommandInput,
-  CommandList,
-  CommandEmpty,
-  CommandGroup,
-  CommandItem,
-  CommandShortcut,
-  CommandSeparator,
-};
--- a/web/src/lib/constants.ts
+++ b/web/src/lib/constants.ts
@@ -82,6 +82,9 @@ export const NEXT_PUBLIC_FORGOT_PASSWORD_ENABLED =
 export const NEXT_PUBLIC_TEST_ENV =
  process.env.NEXT_PUBLIC_TEST_ENV?.toLowerCase() === "true";

+export const NEXT_PUBLIC_DELETE_ALL_CHATS_ENABLED =
+  process.env.NEXT_PUBLIC_DELETE_ALL_CHATS_ENABLED?.toLowerCase() === "true";
+
 export const NEXT_PUBLIC_ENABLE_CHROME_EXTENSION =
  process.env.NEXT_PUBLIC_ENABLE_CHROME_EXTENSION?.toLowerCase() === "true";

--- a/web/src/lib/hooks.ts
+++ b/web/src/lib/hooks.ts
@@ -706,10 +706,6 @@ const MODEL_DISPLAY_NAMES: { [key: string]: string } = {
  "phi-3.5-mini-instruct": "Phi 3.5 Mini",
  "phi-3.5-moe-instruct": "Phi 3.5 MoE",
  "phi-3.5-vision-instruct": "Phi 3.5 Vision",
-  "phi-4": "Phi 4",
-
-  // Deepseek Models
-  "deepseek-r1": "DeepSeek R1",

  // Anthropic models
  "claude-3-opus-20240229": "Claude 3 Opus",
@@ -726,8 +722,6 @@ const MODEL_DISPLAY_NAMES: { [key: string]: string } = {
  "claude-3-5-haiku-20241022": "Claude 3.5 Haiku",
  "claude-3-5-haiku@20241022": "Claude 3.5 Haiku",
  "claude-3.5-haiku@20241022": "Claude 3.5 Haiku",
-  "claude-3.7-sonnet@202502019": "Claude 3.7 Sonnet",
-  "claude-3-7-sonnet-202502019": "Claude 3.7 Sonnet",

  // Google Models
  "gemini-1.5-pro": "Gemini 1.5 Pro",
@@ -737,16 +731,10 @@ const MODEL_DISPLAY_NAMES: { [key: string]: string } = {
  "gemini-1.5-pro-002": "Gemini 1.5 Pro (v2)",
  "gemini-1.5-flash-002": "Gemini 1.5 Flash (v2)",
  "gemini-2.0-flash-exp": "Gemini 2.0 Flash (Experimental)",
-  "gemini-2.0-flash-001": "Gemini 2.0 Flash",
-  "gemini-2.0-flash-lite-preview-02-05": "Gemini 2.0 Flash Lite (Prv)",
-  "gemini-2.0-flash-thinking-exp-01-02": "Gemini 2.0 Flash Thinking (Exp)",
-  "gemini-2.0-pro-exp-02-05": "Gemini 2.0 Pro (Exp)",
-  "gemini-2.0-flash": "Gemini 2.0 Flash",
-  "gemini-2.0-flash-thinking-exp-01-21": "Gemini 2.0 Flash Thinking",
+
  // Mistral Models
  "mistral-large-2411": "Mistral Large 24.11",
  "mistral-large@2411": "Mistral Large 24.11",
-  "ministral-3b": "Ministral 3B",

  // Bedrock models
  "meta.llama3-1-70b-instruct-v1:0": "Llama 3.1 70B",
@@ -767,8 +755,6 @@ const MODEL_DISPLAY_NAMES: { [key: string]: string } = {
  "anthropic.claude-v2:1": "Claude v2.1",
  "anthropic.claude-v2": "Claude v2",
  "anthropic.claude-v1": "Claude v1",
-  "anthropic.claude-3-7-sonnet-20250219-v1:0": "Claude 3.7 Sonnet",
-  "us.anthropic.claude-3-7-sonnet-20250219-v1:0": "Claude 3.7 Sonnet",
  "anthropic.claude-3-opus-20240229-v1:0": "Claude 3 Opus",
  "anthropic.claude-3-haiku-20240307-v1:0": "Claude 3 Haiku",
  "anthropic.claude-3-5-sonnet-20240620-v1:0": "Claude 3.5 Sonnet",
@@ -802,7 +788,6 @@ export const defaultModelsByProvider: { [name: string]: string[] } = {
    "anthropic.claude-3-opus-20240229-v1:0",
    "mistral.mistral-large-2402-v1:0",
    "anthropic.claude-3-5-sonnet-20241022-v2:0",
-    "anthropic.claude-3-7-sonnet-20250219-v1:0",
  ],
  anthropic: ["claude-3-opus-20240229", "claude-3-5-sonnet-20241022"],
 };
--- a/web/src/lib/llm/utils.ts
+++ b/web/src/lib/llm/utils.ts
@@ -90,8 +90,6 @@ const MODEL_NAMES_SUPPORTING_IMAGE_INPUT = [
  "anthropic.claude-3-5-sonnet-20240620-v1:0",
  "anthropic.claude-3-5-sonnet-20241022-v2:0",
  "anthropic.claude-3-7-sonnet-20250219-v1:0",
-  "claude-3.7-sonnet@202502019",
-  "claude-3-7-sonnet-202502019",
  // google gemini model names
  "gemini-1.5-pro",
  "gemini-1.5-flash",
@@ -100,8 +98,6 @@ const MODEL_NAMES_SUPPORTING_IMAGE_INPUT = [
  "gemini-1.5-pro-002",
  "gemini-1.5-flash-002",
  "gemini-2.0-flash-exp",
-  "gemini-2.0-flash-001",
-  "gemini-2.0-pro-exp-02-05",
  // amazon models
  "amazon.nova-lite@v1",
  "amazon.nova-pro@v1",
				`@@ -1 +0,0 @@`
				<svg height="1em" style="flex:none;line-height:1" viewBox="0 0 24 24" width="1em" xmlns="http://www.w3.org/2000/svg"><title>DeepSeek</title><path d="M23.748 4.482c-.254-.124-.364.113-.512.234-.051.039-.094.09-.137.136-.372.397-.806.657-1.373.626-.829-.046-1.537.214-2.163.848-.133-.782-.575-1.248-1.247-1.548-.352-.156-.708-.311-.955-.65-.172-.241-.219-.51-.305-.774-.055-.16-.11-.323-.293-.35-.2-.031-.278.136-.356.276-.313.572-.434 1.202-.422 1.84.027 1.436.633 2.58 1.838 3.393.137.093.172.187.129.323-.082.28-.18.552-.266.833-.055.179-.137.217-.329.14a5.526 5.526 0 01-1.736-1.18c-.857-.828-1.631-1.742-2.597-2.458a11.365 11.365 0 00-.689-.471c-.985-.957.13-1.743.388-1.836.27-.098.093-.432-.779-.428-.872.004-1.67.295-2.687.684a3.055 3.055 0 01-.465.137 9.597 9.597 0 00-2.883-.102c-1.885.21-3.39 1.102-4.497 2.623C.082 8.606-.231 10.684.152 12.85c.403 2.284 1.569 4.175 3.36 5.653 1.858 1.533 3.997 2.284 6.438 2.14 1.482-.085 3.133-.284 4.994-1.86.47.234.962.327 1.78.397.63.059 1.236-.03 1.705-.128.735-.156.684-.837.419-.961-2.155-1.004-1.682-.595-2.113-.926 1.096-1.296 2.746-2.642 3.392-7.003.05-.347.007-.565 0-.845-.004-.17.035-.237.23-.256a4.173 4.173 0 001.545-.475c1.396-.763 1.96-2.015 2.093-3.517.02-.23-.004-.467-.247-.588zM11.581 18c-2.089-1.642-3.102-2.183-3.52-2.16-.392.024-.321.471-.235.763.09.288.207.486.371.739.114.167.192.416-.113.603-.673.416-1.842-.14-1.897-.167-1.361-.802-2.5-1.86-3.301-3.307-.774-1.393-1.224-2.887-1.298-4.482-.02-.386.093-.522.477-.592a4.696 4.696 0 011.529-.039c2.132.312 3.946 1.265 5.468 2.774.868.86 1.525 1.887 2.202 2.891.72 1.066 1.494 2.082 2.48 2.914.348.292.625.514.891.677-.802.09-2.14.11-3.054-.614zm1-6.44a.306.306 0 01.415-.287.302.302 0 01.2.288.306.306 0 01-.31.307.303.303 0 01-.304-.308zm3.11 1.596c-.2.081-.399.151-.59.16a1.245 1.245 0 01-.798-.254c-.274-.23-.47-.358-.552-.758a1.73 1.73 0 01.016-.588c.07-.327-.008-.537-.239-.727-.187-.156-.426-.199-.688-.199a.559.559 0 01-.254-.078c-.11-.054-.2-.19-.114-.358.028-.054.16-.186.192-.21.356-.202.767-.136 1.146.016.352.144.618.408 1.001.782.391.451.462.576.685.914.176.265.336.537.445.848.067.195-.019.354-.25.452z" fill="#4D6BFE"></path></svg>