improve

2026-02-20 01:05:46 +00:00 · 2025-03-04 11:38:59 -08:00
156 changed files with 1511 additions and 5329 deletions
--- a/.github/workflows/docker-build-push-model-server-container-on-tag.yml
+++ b/.github/workflows/docker-build-push-model-server-container-on-tag.yml
@@ -12,40 +12,29 @@ env:
  BUILDKIT_PROGRESS: plain

 jobs:
-
-#   Bypassing this for now as the idea of not building is glitching
-#   releases and builds that depends on everything being tagged in docker
-#   1) Preliminary job to check if the changed files are relevant
-#   check_model_server_changes:
-#     runs-on: ubuntu-latest
-#     outputs:
-#       changed: ${{ steps.check.outputs.changed }}
-#     steps:
-#       - name: Checkout code
-#         uses: actions/checkout@v4
-# 
-#       - name: Check if relevant files changed
-#         id: check
-#         run: |
-#           # Default to "false"
-#           echo "changed=false" >> $GITHUB_OUTPUT
-# 
-#           # Compare the previous commit (github.event.before) to the current one (github.sha)
-#           # If any file in backend/model_server/** or backend/Dockerfile.model_server is changed,
-#           # set changed=true
-#           if git diff --name-only ${{ github.event.before }} ${{ github.sha }} \
-#              | grep -E '^backend/model_server/|^backend/Dockerfile.model_server'; then
-#             echo "changed=true" >> $GITHUB_OUTPUT
-#           fi
-
+  # 1) Preliminary job to check if the changed files are relevant
  check_model_server_changes:
    runs-on: ubuntu-latest
    outputs:
-      changed: "true"
+      changed: ${{ steps.check.outputs.changed }}
    steps:
-      - name: Bypass check and set output
-        run: echo "changed=true" >> $GITHUB_OUTPUT
-        
+      - name: Checkout code
+        uses: actions/checkout@v4
+
+      - name: Check if relevant files changed
+        id: check
+        run: |
+          # Default to "false"
+          echo "changed=false" >> $GITHUB_OUTPUT
+
+          # Compare the previous commit (github.event.before) to the current one (github.sha)
+          # If any file in backend/model_server/** or backend/Dockerfile.model_server is changed,
+          # set changed=true
+          if git diff --name-only ${{ github.event.before }} ${{ github.sha }} \
+             | grep -E '^backend/model_server/|^backend/Dockerfile.model_server'; then
+            echo "changed=true" >> $GITHUB_OUTPUT
+          fi
+
  build-amd64:
    needs: [check_model_server_changes]
    if: needs.check_model_server_changes.outputs.changed == 'true'
--- a/.github/workflows/pr-python-connector-tests.yml
+++ b/.github/workflows/pr-python-connector-tests.yml
@@ -1,7 +1,6 @@
 name: Connector Tests

 on:
-  merge_group:
  pull_request:
    branches: [main]
  schedule:
@@ -52,7 +51,7 @@ env:
 jobs:
  connectors-check:
    # See https://runs-on.com/runners/linux/
-    runs-on: [runs-on, runner=8cpu-linux-x64, "run-id=${{ github.run_id }}"]
+    runs-on: [runs-on,runner=8cpu-linux-x64,"run-id=${{ github.run_id }}"]

    env:
      PYTHONPATH: ./backend
@@ -77,7 +76,7 @@ jobs:
          pip install --retries 5 --timeout 30 -r backend/requirements/dev.txt
          playwright install chromium
          playwright install-deps chromium
-
+          
      - name: Run Tests
        shell: script -q -e -c "bash --noprofile --norc -eo pipefail {0}"
        run: py.test -o junit_family=xunit2 -xv --ff backend/tests/daily/connectors
--- a/backend/alembic/versions/3934b1bc7b62_update_github_connector_repo_name_to_.py
+++ b/backend/alembic/versions/3934b1bc7b62_update_github_connector_repo_name_to_.py
@@ -1,125 +0,0 @@
-"""Update GitHub connector repo_name to repositories
-
-Revision ID: 3934b1bc7b62
-Revises: b7c2b63c4a03
-Create Date: 2025-03-05 10:50:30.516962
-
-"""
-from alembic import op
-import sqlalchemy as sa
-import json
-import logging
-
-# revision identifiers, used by Alembic.
-revision = "3934b1bc7b62"
-down_revision = "b7c2b63c4a03"
-branch_labels = None
-depends_on = None
-
-logger = logging.getLogger("alembic.runtime.migration")
-
-
-def upgrade() -> None:
-    # Get all GitHub connectors
-    conn = op.get_bind()
-
-    # First get all GitHub connectors
-    github_connectors = conn.execute(
-        sa.text(
-            """
-            SELECT id, connector_specific_config
-            FROM connector
-            WHERE source = 'GITHUB'
-            """
-        )
-    ).fetchall()
-
-    # Update each connector's config
-    updated_count = 0
-    for connector_id, config in github_connectors:
-        try:
-            if not config:
-                logger.warning(f"Connector {connector_id} has no config, skipping")
-                continue
-
-            # Parse the config if it's a string
-            if isinstance(config, str):
-                config = json.loads(config)
-
-            if "repo_name" not in config:
-                continue
-
-            # Create new config with repositories instead of repo_name
-            new_config = dict(config)
-            repo_name_value = new_config.pop("repo_name")
-            new_config["repositories"] = repo_name_value
-
-            # Update the connector with the new config
-            conn.execute(
-                sa.text(
-                    """
-                    UPDATE connector
-                    SET connector_specific_config = :new_config
-                    WHERE id = :connector_id
-                    """
-                ),
-                {"connector_id": connector_id, "new_config": json.dumps(new_config)},
-            )
-            updated_count += 1
-        except Exception as e:
-            logger.error(f"Error updating connector {connector_id}: {str(e)}")
-
-
-def downgrade() -> None:
-    # Get all GitHub connectors
-    conn = op.get_bind()
-
-    logger.debug(
-        "Starting rollback of GitHub connectors from repositories to repo_name"
-    )
-
-    github_connectors = conn.execute(
-        sa.text(
-            """
-            SELECT id, connector_specific_config
-            FROM connector
-            WHERE source = 'GITHUB'
-            """
-        )
-    ).fetchall()
-
-    logger.debug(f"Found {len(github_connectors)} GitHub connectors to rollback")
-
-    # Revert each GitHub connector to use repo_name instead of repositories
-    reverted_count = 0
-    for connector_id, config in github_connectors:
-        try:
-            if not config:
-                continue
-
-            # Parse the config if it's a string
-            if isinstance(config, str):
-                config = json.loads(config)
-
-            if "repositories" not in config:
-                continue
-
-            # Create new config with repo_name instead of repositories
-            new_config = dict(config)
-            repositories_value = new_config.pop("repositories")
-            new_config["repo_name"] = repositories_value
-
-            # Update the connector with the new config
-            conn.execute(
-                sa.text(
-                    """
-                    UPDATE connector
-                    SET connector_specific_config = :new_config
-                    WHERE id = :connector_id
-                    """
-                ),
-                {"new_config": json.dumps(new_config), "connector_id": connector_id},
-            )
-            reverted_count += 1
-        except Exception as e:
-            logger.error(f"Error reverting connector {connector_id}: {str(e)}")
--- a/backend/ee/onyx/db/query_history.py
+++ b/backend/ee/onyx/db/query_history.py
@@ -134,9 +134,7 @@ def fetch_chat_sessions_eagerly_by_time(
    limit: int | None = 500,
    initial_time: datetime | None = None,
 ) -> list[ChatSession]:
-    """Sorted by oldest to newest, then by message id"""
-
-    asc_time_order: UnaryExpression = asc(ChatSession.time_created)
+    time_order: UnaryExpression = desc(ChatSession.time_created)
    message_order: UnaryExpression = asc(ChatMessage.id)

    filters: list[ColumnElement | BinaryExpression] = [
@@ -149,7 +147,8 @@ def fetch_chat_sessions_eagerly_by_time(
    subquery = (
        db_session.query(ChatSession.id, ChatSession.time_created)
        .filter(*filters)
-        .order_by(asc_time_order)
+        .order_by(ChatSession.id, time_order)
+        .distinct(ChatSession.id)
        .limit(limit)
        .subquery()
    )
@@ -165,7 +164,7 @@ def fetch_chat_sessions_eagerly_by_time(
                ChatMessage.chat_message_feedbacks
            ),
        )
-        .order_by(asc_time_order, message_order)
+        .order_by(time_order, message_order)
    )

    chat_sessions = query.all()
--- a/backend/ee/onyx/db/usage_export.py
+++ b/backend/ee/onyx/db/usage_export.py
@@ -16,20 +16,13 @@ from onyx.db.models import UsageReport
 from onyx.file_store.file_store import get_default_file_store


-# Gets skeletons of all messages in the given range
+# Gets skeletons of all message
 def get_empty_chat_messages_entries__paginated(
    db_session: Session,
    period: tuple[datetime, datetime],
    limit: int | None = 500,
    initial_time: datetime | None = None,
 ) -> tuple[Optional[datetime], list[ChatMessageSkeleton]]:
-    """Returns a tuple where:
-    first element is the most recent timestamp out of the sessions iterated
-    - this timestamp can be used to paginate forward in time
-    second element is a list of messages belonging to all the sessions iterated
-
-    Only messages of type USER are returned
-    """
    chat_sessions = fetch_chat_sessions_eagerly_by_time(
        start=period[0],
        end=period[1],
@@ -59,17 +52,18 @@ def get_empty_chat_messages_entries__paginated(
    if len(chat_sessions) == 0:
        return None, []

-    return chat_sessions[-1].time_created, message_skeletons
+    return chat_sessions[0].time_created, message_skeletons


 def get_all_empty_chat_message_entries(
    db_session: Session,
    period: tuple[datetime, datetime],
 ) -> Generator[list[ChatMessageSkeleton], None, None]:
-    """period is the range of time over which to fetch messages."""
    initial_time: Optional[datetime] = period[0]
+    ind = 0
    while True:
-        # iterate from oldest to newest
+        ind += 1
+
        time_created, message_skeletons = get_empty_chat_messages_entries__paginated(
            db_session,
            period,
--- a/backend/ee/onyx/main.py
+++ b/backend/ee/onyx/main.py
@@ -15,7 +15,7 @@ from ee.onyx.server.enterprise_settings.api import (
 )
 from ee.onyx.server.manage.standard_answer import router as standard_answer_router
 from ee.onyx.server.middleware.tenant_tracking import add_tenant_id_middleware
-from ee.onyx.server.oauth.api import router as ee_oauth_router
+from ee.onyx.server.oauth.api import router as oauth_router
 from ee.onyx.server.query_and_chat.chat_backend import (
    router as chat_router,
 )
@@ -128,7 +128,7 @@ def get_application() -> FastAPI:
    include_router_with_global_prefix_prepended(application, query_router)
    include_router_with_global_prefix_prepended(application, chat_router)
    include_router_with_global_prefix_prepended(application, standard_answer_router)
-    include_router_with_global_prefix_prepended(application, ee_oauth_router)
+    include_router_with_global_prefix_prepended(application, oauth_router)

    # Enterprise-only global settings
    include_router_with_global_prefix_prepended(
--- a/backend/ee/onyx/server/oauth/confluence_cloud.py
+++ b/backend/ee/onyx/server/oauth/confluence_cloud.py
@@ -80,7 +80,6 @@ class ConfluenceCloudOAuth:
        "search:confluence%20"
        # granular scope
        "read:attachment:confluence%20"  # possibly unneeded unless calling v2 attachments api
-        "read:content-details:confluence%20"  # for permission sync
        "offline_access"
    )

--- a/backend/ee/onyx/server/query_history/api.py
+++ b/backend/ee/onyx/server/query_history/api.py
@@ -48,15 +48,10 @@ def fetch_and_process_chat_session_history(
    feedback_type: QAFeedbackType | None,
    limit: int | None = 500,
 ) -> list[ChatSessionSnapshot]:
-    # observed to be slow a scale of 8192 sessions and 4 messages per session
-
-    # this is a little slow (5 seconds)
    chat_sessions = fetch_chat_sessions_eagerly_by_time(
        start=start, end=end, db_session=db_session, limit=limit
    )

-    # this is VERY slow (80 seconds) due to create_chat_chain being called
-    # for each session. Needs optimizing.
    chat_session_snapshots = [
        snapshot_from_chat_session(chat_session=chat_session, db_session=db_session)
        for chat_session in chat_sessions
@@ -251,8 +246,6 @@ def get_query_history_as_csv(
            detail="Query history has been disabled by the administrator.",
        )

-    # this call is very expensive and is timing out via endpoint
-    # TODO: optimize call and/or generate via background task
    complete_chat_session_history = fetch_and_process_chat_session_history(
        db_session=db_session,
        start=start or datetime.fromtimestamp(0, tz=timezone.utc),
--- a/backend/ee/onyx/server/tenants/product_gating.py
+++ b/backend/ee/onyx/server/tenants/product_gating.py
@@ -48,5 +48,4 @@ def store_product_gating(tenant_id: str, application_status: ApplicationStatus)

 def get_gated_tenants() -> set[str]:
    redis_client = get_redis_replica_client(tenant_id=ONYX_CLOUD_TENANT_ID)
-    gated_tenants_bytes = cast(set[bytes], redis_client.smembers(GATED_TENANTS_KEY))
-    return {tenant_id.decode("utf-8") for tenant_id in gated_tenants_bytes}
+    return cast(set[str], redis_client.smembers(GATED_TENANTS_KEY))
--- a/backend/ee/onyx/server/tenants/provisioning.py
+++ b/backend/ee/onyx/server/tenants/provisioning.py
@@ -55,11 +55,7 @@ logger = logging.getLogger(__name__)
 async def get_or_provision_tenant(
    email: str, referral_source: str | None = None, request: Request | None = None
 ) -> str:
-    """
-    Get existing tenant ID for an email or create a new tenant if none exists.
-    This function should only be called after we have verified we want this user's tenant to exist.
-    It returns the tenant ID associated with the email, creating a new tenant if necessary.
-    """
+    """Get existing tenant ID for an email or create a new tenant if none exists."""
    if not MULTI_TENANT:
        return POSTGRES_DEFAULT_SCHEMA

--- a/backend/model_server/constants.py
+++ b/backend/model_server/constants.py
@@ -6,7 +6,7 @@ MODEL_WARM_UP_STRING = "hi " * 512
 DEFAULT_OPENAI_MODEL = "text-embedding-3-small"
 DEFAULT_COHERE_MODEL = "embed-english-light-v3.0"
 DEFAULT_VOYAGE_MODEL = "voyage-large-2-instruct"
-DEFAULT_VERTEX_MODEL = "text-embedding-005"
+DEFAULT_VERTEX_MODEL = "text-embedding-004"


 class EmbeddingModelTextType:
--- a/backend/model_server/encoders.py
+++ b/backend/model_server/encoders.py
@@ -5,7 +5,6 @@ from types import TracebackType
 from typing import cast
 from typing import Optional

-import aioboto3  # type: ignore
 import httpx
 import openai
 import vertexai  # type: ignore
@@ -29,13 +28,11 @@ from model_server.constants import DEFAULT_VERTEX_MODEL
 from model_server.constants import DEFAULT_VOYAGE_MODEL
 from model_server.constants import EmbeddingModelTextType
 from model_server.constants import EmbeddingProvider
-from model_server.utils import pass_aws_key
 from model_server.utils import simple_log_function_time
 from onyx.utils.logger import setup_logger
 from shared_configs.configs import API_BASED_EMBEDDING_TIMEOUT
 from shared_configs.configs import INDEXING_ONLY
 from shared_configs.configs import OPENAI_EMBEDDING_TIMEOUT
-from shared_configs.configs import VERTEXAI_EMBEDDING_LOCAL_BATCH_SIZE
 from shared_configs.enums import EmbedTextType
 from shared_configs.enums import RerankerProvider
 from shared_configs.model_server_models import Embedding
@@ -185,24 +182,17 @@ class CloudEmbedding:
        vertexai.init(project=project_id, credentials=credentials)
        client = TextEmbeddingModel.from_pretrained(model)

-        inputs = [TextEmbeddingInput(text, embedding_type) for text in texts]
-
-        # Split into batches of 25 texts
-        max_texts_per_batch = VERTEXAI_EMBEDDING_LOCAL_BATCH_SIZE
-        batches = [
-            inputs[i : i + max_texts_per_batch]
-            for i in range(0, len(inputs), max_texts_per_batch)
-        ]
-
-        # Dispatch all embedding calls asynchronously at once
-        tasks = [
-            client.get_embeddings_async(batch, auto_truncate=True) for batch in batches
-        ]
-
-        # Wait for all tasks to complete in parallel
-        results = await asyncio.gather(*tasks)
-
-        return [embedding.values for batch in results for embedding in batch]
+        embeddings = await client.get_embeddings_async(
+            [
+                TextEmbeddingInput(
+                    text,
+                    embedding_type,
+                )
+                for text in texts
+            ],
+            auto_truncate=True,  # This is the default
+        )
+        return [embedding.values for embedding in embeddings]

    async def _embed_litellm_proxy(
        self, texts: list[str], model_name: str | None
@@ -457,7 +447,7 @@ async def local_rerank(query: str, docs: list[str], model_name: str) -> list[flo
    )


-async def cohere_rerank_api(
+async def cohere_rerank(
    query: str, docs: list[str], model_name: str, api_key: str
 ) -> list[float]:
    cohere_client = CohereAsyncClient(api_key=api_key)
@@ -467,45 +457,6 @@ async def cohere_rerank_api(
    return [result.relevance_score for result in sorted_results]


-async def cohere_rerank_aws(
-    query: str,
-    docs: list[str],
-    model_name: str,
-    region_name: str,
-    aws_access_key_id: str,
-    aws_secret_access_key: str,
-) -> list[float]:
-    session = aioboto3.Session(
-        aws_access_key_id=aws_access_key_id, aws_secret_access_key=aws_secret_access_key
-    )
-    async with session.client(
-        "bedrock-runtime", region_name=region_name
-    ) as bedrock_client:
-        body = json.dumps(
-            {
-                "query": query,
-                "documents": docs,
-                "api_version": 2,
-            }
-        )
-        # Invoke the Bedrock model asynchronously
-        response = await bedrock_client.invoke_model(
-            modelId=model_name,
-            accept="application/json",
-            contentType="application/json",
-            body=body,
-        )
-
-        # Read the response asynchronously
-        response_body = json.loads(await response["body"].read())
-
-        # Extract and sort the results
-        results = response_body.get("results", [])
-        sorted_results = sorted(results, key=lambda item: item["index"])
-
-        return [result["relevance_score"] for result in sorted_results]
-
-
 async def litellm_rerank(
    query: str, docs: list[str], api_url: str, model_name: str, api_key: str | None
 ) -> list[float]:
@@ -621,32 +572,15 @@ async def process_rerank_request(rerank_request: RerankRequest) -> RerankRespons
        elif rerank_request.provider_type == RerankerProvider.COHERE:
            if rerank_request.api_key is None:
                raise RuntimeError("Cohere Rerank Requires an API Key")
-            sim_scores = await cohere_rerank_api(
+            sim_scores = await cohere_rerank(
                query=rerank_request.query,
                docs=rerank_request.documents,
                model_name=rerank_request.model_name,
                api_key=rerank_request.api_key,
            )
            return RerankResponse(scores=sim_scores)
-
-        elif rerank_request.provider_type == RerankerProvider.BEDROCK:
-            if rerank_request.api_key is None:
-                raise RuntimeError("Bedrock Rerank Requires an API Key")
-            aws_access_key_id, aws_secret_access_key, aws_region = pass_aws_key(
-                rerank_request.api_key
-            )
-            sim_scores = await cohere_rerank_aws(
-                query=rerank_request.query,
-                docs=rerank_request.documents,
-                model_name=rerank_request.model_name,
-                region_name=aws_region,
-                aws_access_key_id=aws_access_key_id,
-                aws_secret_access_key=aws_secret_access_key,
-            )
-            return RerankResponse(scores=sim_scores)
        else:
            raise ValueError(f"Unsupported provider: {rerank_request.provider_type}")
-
    except Exception as e:
        logger.exception(f"Error during reranking process:\n{str(e)}")
        raise HTTPException(
--- a/backend/model_server/utils.py
+++ b/backend/model_server/utils.py
@@ -70,32 +70,3 @@ def get_gpu_type() -> str:
        return GPUStatus.MAC_MPS

    return GPUStatus.NONE
-
-
-def pass_aws_key(api_key: str) -> tuple[str, str, str]:
-    """Parse AWS API key string into components.
-
-    Args:
-        api_key: String in format 'aws_ACCESSKEY_SECRETKEY_REGION'
-
-    Returns:
-        Tuple of (access_key, secret_key, region)
-
-    Raises:
-        ValueError: If key format is invalid
-    """
-    if not api_key.startswith("aws"):
-        raise ValueError("API key must start with 'aws' prefix")
-
-    parts = api_key.split("_")
-    if len(parts) != 4:
-        raise ValueError(
-            f"API key must be in format 'aws_ACCESSKEY_SECRETKEY_REGION', got {len(parts) - 1} parts"
-            "this is an onyx specific format for formatting the aws secrets for bedrock"
-        )
-
-    try:
-        _, aws_access_key_id, aws_secret_access_key, aws_region = parts
-        return aws_access_key_id, aws_secret_access_key, aws_region
-    except Exception as e:
-        raise ValueError(f"Failed to parse AWS key components: {str(e)}")
--- a/backend/onyx/agents/agent_search/deep_search/initial/generate_individual_sub_answer/nodes/check_sub_answer.py
+++ b/backend/onyx/agents/agent_search/deep_search/initial/generate_individual_sub_answer/nodes/check_sub_answer.py
@@ -31,7 +31,6 @@ from onyx.agents.agent_search.shared_graph_utils.utils import (
    get_langgraph_node_log_string,
 )
 from onyx.agents.agent_search.shared_graph_utils.utils import parse_question_id
-from onyx.configs.agent_configs import AGENT_MAX_TOKENS_VALIDATION
 from onyx.configs.agent_configs import AGENT_TIMEOUT_CONNECT_LLM_SUBANSWER_CHECK
 from onyx.configs.agent_configs import AGENT_TIMEOUT_LLM_SUBANSWER_CHECK
 from onyx.llm.chat_llm import LLMRateLimitError
@@ -93,7 +92,6 @@ def check_sub_answer(
            fast_llm.invoke,
            prompt=msg,
            timeout_override=AGENT_TIMEOUT_CONNECT_LLM_SUBANSWER_CHECK,
-            max_tokens=AGENT_MAX_TOKENS_VALIDATION,
        )

        quality_str: str = cast(str, response.content)
--- a/backend/onyx/agents/agent_search/deep_search/initial/generate_individual_sub_answer/nodes/generate_sub_answer.py
+++ b/backend/onyx/agents/agent_search/deep_search/initial/generate_individual_sub_answer/nodes/generate_sub_answer.py
@@ -46,7 +46,6 @@ from onyx.chat.models import StreamStopInfo
 from onyx.chat.models import StreamStopReason
 from onyx.chat.models import StreamType
 from onyx.configs.agent_configs import AGENT_MAX_ANSWER_CONTEXT_DOCS
-from onyx.configs.agent_configs import AGENT_MAX_TOKENS_SUBANSWER_GENERATION
 from onyx.configs.agent_configs import AGENT_TIMEOUT_CONNECT_LLM_SUBANSWER_GENERATION
 from onyx.configs.agent_configs import AGENT_TIMEOUT_LLM_SUBANSWER_GENERATION
 from onyx.llm.chat_llm import LLMRateLimitError
@@ -120,7 +119,6 @@ def generate_sub_answer(
            for message in fast_llm.stream(
                prompt=msg,
                timeout_override=AGENT_TIMEOUT_CONNECT_LLM_SUBANSWER_GENERATION,
-                max_tokens=AGENT_MAX_TOKENS_SUBANSWER_GENERATION,
            ):
                # TODO: in principle, the answer here COULD contain images, but we don't support that yet
                content = message.content
--- a/backend/onyx/agents/agent_search/deep_search/initial/generate_initial_answer/nodes/generate_initial_answer.py
+++ b/backend/onyx/agents/agent_search/deep_search/initial/generate_initial_answer/nodes/generate_initial_answer.py
@@ -43,7 +43,6 @@ from onyx.agents.agent_search.shared_graph_utils.models import LLMNodeErrorStrin
 from onyx.agents.agent_search.shared_graph_utils.operators import (
    dedup_inference_section_list,
 )
-from onyx.agents.agent_search.shared_graph_utils.utils import _should_restrict_tokens
 from onyx.agents.agent_search.shared_graph_utils.utils import (
    dispatch_main_answer_stop_info,
 )
@@ -63,7 +62,6 @@ from onyx.chat.models import StreamingError
 from onyx.configs.agent_configs import AGENT_ANSWER_GENERATION_BY_FAST_LLM
 from onyx.configs.agent_configs import AGENT_MAX_ANSWER_CONTEXT_DOCS
 from onyx.configs.agent_configs import AGENT_MAX_STREAMED_DOCS_FOR_INITIAL_ANSWER
-from onyx.configs.agent_configs import AGENT_MAX_TOKENS_ANSWER_GENERATION
 from onyx.configs.agent_configs import AGENT_MIN_ORIG_QUESTION_DOCS
 from onyx.configs.agent_configs import (
    AGENT_TIMEOUT_CONNECT_LLM_INITIAL_ANSWER_GENERATION,
@@ -155,9 +153,8 @@ def generate_initial_answer(
    )
    for tool_response in yield_search_responses(
        query=question,
-        get_retrieved_sections=lambda: answer_generation_documents.context_documents,
-        get_reranked_sections=lambda: answer_generation_documents.streaming_documents,
-        get_final_context_sections=lambda: answer_generation_documents.context_documents,
+        reranked_sections=answer_generation_documents.streaming_documents,
+        final_context_sections=answer_generation_documents.context_documents,
        search_query_info=query_info,
        get_section_relevance=lambda: relevance_list,
        search_tool=graph_config.tooling.search_tool,
@@ -281,9 +278,6 @@ def generate_initial_answer(
            for message in model.stream(
                msg,
                timeout_override=AGENT_TIMEOUT_CONNECT_LLM_INITIAL_ANSWER_GENERATION,
-                max_tokens=AGENT_MAX_TOKENS_ANSWER_GENERATION
-                if _should_restrict_tokens(model.config)
-                else None,
            ):
                # TODO: in principle, the answer here COULD contain images, but we don't support that yet
                content = message.content
--- a/backend/onyx/agents/agent_search/deep_search/initial/generate_sub_answers/nodes/decompose_orig_question.py
+++ b/backend/onyx/agents/agent_search/deep_search/initial/generate_sub_answers/nodes/decompose_orig_question.py
@@ -34,7 +34,6 @@ from onyx.chat.models import StreamStopInfo
 from onyx.chat.models import StreamStopReason
 from onyx.chat.models import StreamType
 from onyx.chat.models import SubQuestionPiece
-from onyx.configs.agent_configs import AGENT_MAX_TOKENS_SUBQUESTION_GENERATION
 from onyx.configs.agent_configs import AGENT_NUM_DOCS_FOR_DECOMPOSITION
 from onyx.configs.agent_configs import (
    AGENT_TIMEOUT_CONNECT_LLM_SUBQUESTION_GENERATION,
@@ -142,7 +141,6 @@ def decompose_orig_question(
            model.stream(
                msg,
                timeout_override=AGENT_TIMEOUT_CONNECT_LLM_SUBQUESTION_GENERATION,
-                max_tokens=AGENT_MAX_TOKENS_SUBQUESTION_GENERATION,
            ),
            dispatch_subquestion(0, writer),
            sep_callback=dispatch_subquestion_sep(0, writer),
--- a/backend/onyx/agents/agent_search/deep_search/main/nodes/compare_answers.py
+++ b/backend/onyx/agents/agent_search/deep_search/main/nodes/compare_answers.py
@@ -33,7 +33,6 @@ from onyx.agents.agent_search.shared_graph_utils.utils import (
 )
 from onyx.agents.agent_search.shared_graph_utils.utils import write_custom_event
 from onyx.chat.models import RefinedAnswerImprovement
-from onyx.configs.agent_configs import AGENT_MAX_TOKENS_VALIDATION
 from onyx.configs.agent_configs import AGENT_TIMEOUT_CONNECT_LLM_COMPARE_ANSWERS
 from onyx.configs.agent_configs import AGENT_TIMEOUT_LLM_COMPARE_ANSWERS
 from onyx.llm.chat_llm import LLMRateLimitError
@@ -113,7 +112,6 @@ def compare_answers(
            model.invoke,
            prompt=msg,
            timeout_override=AGENT_TIMEOUT_CONNECT_LLM_COMPARE_ANSWERS,
-            max_tokens=AGENT_MAX_TOKENS_VALIDATION,
        )

    except (LLMTimeoutError, TimeoutError):
--- a/backend/onyx/agents/agent_search/deep_search/main/nodes/create_refined_sub_questions.py
+++ b/backend/onyx/agents/agent_search/deep_search/main/nodes/create_refined_sub_questions.py
@@ -43,7 +43,6 @@ from onyx.agents.agent_search.shared_graph_utils.utils import (
 from onyx.agents.agent_search.shared_graph_utils.utils import make_question_id
 from onyx.agents.agent_search.shared_graph_utils.utils import write_custom_event
 from onyx.chat.models import StreamingError
-from onyx.configs.agent_configs import AGENT_MAX_TOKENS_SUBQUESTION_GENERATION
 from onyx.configs.agent_configs import (
    AGENT_TIMEOUT_CONNECT_LLM_REFINED_SUBQUESTION_GENERATION,
 )
@@ -145,7 +144,6 @@ def create_refined_sub_questions(
            model.stream(
                msg,
                timeout_override=AGENT_TIMEOUT_CONNECT_LLM_REFINED_SUBQUESTION_GENERATION,
-                max_tokens=AGENT_MAX_TOKENS_SUBQUESTION_GENERATION,
            ),
            dispatch_subquestion(1, writer),
            sep_callback=dispatch_subquestion_sep(1, writer),
--- a/backend/onyx/agents/agent_search/deep_search/main/nodes/decide_refinement_need.py
+++ b/backend/onyx/agents/agent_search/deep_search/main/nodes/decide_refinement_need.py
@@ -50,7 +50,13 @@ def decide_refinement_need(
        )
    ]

-    return RequireRefinemenEvalUpdate(
-        require_refined_answer_eval=graph_config.behavior.allow_refinement and decision,
-        log_messages=log_messages,
-    )
+    if graph_config.behavior.allow_refinement:
+        return RequireRefinemenEvalUpdate(
+            require_refined_answer_eval=decision,
+            log_messages=log_messages,
+        )
+    else:
+        return RequireRefinemenEvalUpdate(
+            require_refined_answer_eval=False,
+            log_messages=log_messages,
+        )
--- a/backend/onyx/agents/agent_search/deep_search/main/nodes/extract_entities_terms.py
+++ b/backend/onyx/agents/agent_search/deep_search/main/nodes/extract_entities_terms.py
@@ -21,7 +21,6 @@ from onyx.agents.agent_search.shared_graph_utils.utils import format_docs
 from onyx.agents.agent_search.shared_graph_utils.utils import (
    get_langgraph_node_log_string,
 )
-from onyx.configs.agent_configs import AGENT_MAX_TOKENS_ENTITY_TERM_EXTRACTION
 from onyx.configs.agent_configs import (
    AGENT_TIMEOUT_CONNECT_LLM_ENTITY_TERM_EXTRACTION,
 )
@@ -97,7 +96,6 @@ def extract_entities_terms(
            fast_llm.invoke,
            prompt=msg,
            timeout_override=AGENT_TIMEOUT_CONNECT_LLM_ENTITY_TERM_EXTRACTION,
-            max_tokens=AGENT_MAX_TOKENS_ENTITY_TERM_EXTRACTION,
        )

        cleaned_response = (
--- a/backend/onyx/agents/agent_search/deep_search/main/nodes/generate_validate_refined_answer.py
+++ b/backend/onyx/agents/agent_search/deep_search/main/nodes/generate_validate_refined_answer.py
@@ -46,7 +46,6 @@ from onyx.agents.agent_search.shared_graph_utils.models import RefinedAgentStats
 from onyx.agents.agent_search.shared_graph_utils.operators import (
    dedup_inference_section_list,
 )
-from onyx.agents.agent_search.shared_graph_utils.utils import _should_restrict_tokens
 from onyx.agents.agent_search.shared_graph_utils.utils import (
    dispatch_main_answer_stop_info,
 )
@@ -69,8 +68,6 @@ from onyx.chat.models import StreamingError
 from onyx.configs.agent_configs import AGENT_ANSWER_GENERATION_BY_FAST_LLM
 from onyx.configs.agent_configs import AGENT_MAX_ANSWER_CONTEXT_DOCS
 from onyx.configs.agent_configs import AGENT_MAX_STREAMED_DOCS_FOR_REFINED_ANSWER
-from onyx.configs.agent_configs import AGENT_MAX_TOKENS_ANSWER_GENERATION
-from onyx.configs.agent_configs import AGENT_MAX_TOKENS_VALIDATION
 from onyx.configs.agent_configs import AGENT_MIN_ORIG_QUESTION_DOCS
 from onyx.configs.agent_configs import (
    AGENT_TIMEOUT_CONNECT_LLM_REFINED_ANSWER_GENERATION,
@@ -182,9 +179,8 @@ def generate_validate_refined_answer(
    )
    for tool_response in yield_search_responses(
        query=question,
-        get_retrieved_sections=lambda: answer_generation_documents.context_documents,
-        get_reranked_sections=lambda: answer_generation_documents.streaming_documents,
-        get_final_context_sections=lambda: answer_generation_documents.context_documents,
+        reranked_sections=answer_generation_documents.streaming_documents,
+        final_context_sections=answer_generation_documents.context_documents,
        search_query_info=query_info,
        get_section_relevance=lambda: relevance_list,
        search_tool=graph_config.tooling.search_tool,
@@ -306,11 +302,7 @@ def generate_validate_refined_answer(

    def stream_refined_answer() -> list[str]:
        for message in model.stream(
-            msg,
-            timeout_override=AGENT_TIMEOUT_CONNECT_LLM_REFINED_ANSWER_GENERATION,
-            max_tokens=AGENT_MAX_TOKENS_ANSWER_GENERATION
-            if _should_restrict_tokens(model.config)
-            else None,
+            msg, timeout_override=AGENT_TIMEOUT_CONNECT_LLM_REFINED_ANSWER_GENERATION
        ):
            # TODO: in principle, the answer here COULD contain images, but we don't support that yet
            content = message.content
@@ -417,7 +409,6 @@ def generate_validate_refined_answer(
            validation_model.invoke,
            prompt=msg,
            timeout_override=AGENT_TIMEOUT_CONNECT_LLM_REFINED_ANSWER_VALIDATION,
-            max_tokens=AGENT_MAX_TOKENS_VALIDATION,
        )
        refined_answer_quality = binary_string_test_after_answer_separator(
            text=cast(str, validation_response.content),
--- a/backend/onyx/agents/agent_search/deep_search/main/operations.py
+++ b/backend/onyx/agents/agent_search/deep_search/main/operations.py
@@ -13,6 +13,7 @@ from onyx.chat.models import StreamStopInfo
 from onyx.chat.models import StreamStopReason
 from onyx.chat.models import StreamType
 from onyx.chat.models import SubQuestionPiece
+from onyx.context.search.models import IndexFilters
 from onyx.tools.models import SearchQueryInfo
 from onyx.utils.logger import setup_logger

@@ -143,6 +144,8 @@ def get_query_info(results: list[QueryRetrievalResult]) -> SearchQueryInfo:
        if result.query_info is not None:
            query_info = result.query_info
            break
-
-    assert query_info is not None, "must have query info"
-    return query_info
+    return query_info or SearchQueryInfo(
+        predicted_search=None,
+        final_filters=IndexFilters(access_control_list=None),
+        recency_bias_multiplier=1.0,
+    )
--- a/backend/onyx/agents/agent_search/deep_search/shared/expanded_retrieval/nodes/expand_queries.py
+++ b/backend/onyx/agents/agent_search/deep_search/shared/expanded_retrieval/nodes/expand_queries.py
@@ -33,7 +33,6 @@ from onyx.agents.agent_search.shared_graph_utils.utils import (
    get_langgraph_node_log_string,
 )
 from onyx.agents.agent_search.shared_graph_utils.utils import parse_question_id
-from onyx.configs.agent_configs import AGENT_MAX_TOKENS_SUBQUERY_GENERATION
 from onyx.configs.agent_configs import (
    AGENT_TIMEOUT_CONNECT_LLM_QUERY_REWRITING_GENERATION,
 )
@@ -97,7 +96,6 @@ def expand_queries(
            model.stream(
                prompt=msg,
                timeout_override=AGENT_TIMEOUT_CONNECT_LLM_QUERY_REWRITING_GENERATION,
-                max_tokens=AGENT_MAX_TOKENS_SUBQUERY_GENERATION,
            ),
            dispatch_subquery(level, question_num, writer),
        )
--- a/backend/onyx/agents/agent_search/deep_search/shared/expanded_retrieval/nodes/format_results.py
+++ b/backend/onyx/agents/agent_search/deep_search/shared/expanded_retrieval/nodes/format_results.py
@@ -56,9 +56,8 @@ def format_results(
        relevance_list = relevance_from_docs(reranked_documents)
        for tool_response in yield_search_responses(
            query=state.question,
-            get_retrieved_sections=lambda: reranked_documents,
-            get_reranked_sections=lambda: state.retrieved_documents,
-            get_final_context_sections=lambda: reranked_documents,
+            reranked_sections=state.retrieved_documents,
+            final_context_sections=reranked_documents,
            search_query_info=query_info,
            get_section_relevance=lambda: relevance_list,
            search_tool=graph_config.tooling.search_tool,
--- a/backend/onyx/agents/agent_search/deep_search/shared/expanded_retrieval/nodes/retrieve_documents.py
+++ b/backend/onyx/agents/agent_search/deep_search/shared/expanded_retrieval/nodes/retrieve_documents.py
@@ -91,7 +91,7 @@ def retrieve_documents(
    retrieved_docs = retrieved_docs[:AGENT_MAX_QUERY_RETRIEVAL_RESULTS]

    if AGENT_RETRIEVAL_STATS:
-        pre_rerank_docs = callback_container[0] if callback_container else []
+        pre_rerank_docs = callback_container[0]
        fit_scores = get_fit_scores(
            pre_rerank_docs,
            retrieved_docs,
--- a/backend/onyx/agents/agent_search/deep_search/shared/expanded_retrieval/nodes/verify_documents.py
+++ b/backend/onyx/agents/agent_search/deep_search/shared/expanded_retrieval/nodes/verify_documents.py
@@ -25,7 +25,6 @@ from onyx.agents.agent_search.shared_graph_utils.models import LLMNodeErrorStrin
 from onyx.agents.agent_search.shared_graph_utils.utils import (
    get_langgraph_node_log_string,
 )
-from onyx.configs.agent_configs import AGENT_MAX_TOKENS_VALIDATION
 from onyx.configs.agent_configs import AGENT_TIMEOUT_CONNECT_LLM_DOCUMENT_VERIFICATION
 from onyx.configs.agent_configs import AGENT_TIMEOUT_LLM_DOCUMENT_VERIFICATION
 from onyx.llm.chat_llm import LLMRateLimitError
@@ -94,7 +93,6 @@ def verify_documents(
            fast_llm.invoke,
            prompt=msg,
            timeout_override=AGENT_TIMEOUT_CONNECT_LLM_DOCUMENT_VERIFICATION,
-            max_tokens=AGENT_MAX_TOKENS_VALIDATION,
        )

        assert isinstance(response.content, str)
--- a/backend/onyx/agents/agent_search/orchestration/nodes/call_tool.py
+++ b/backend/onyx/agents/agent_search/orchestration/nodes/call_tool.py
@@ -44,9 +44,7 @@ def call_tool(
    tool = tool_choice.tool
    tool_args = tool_choice.tool_args
    tool_id = tool_choice.id
-    tool_runner = ToolRunner(
-        tool, tool_args, override_kwargs=tool_choice.search_tool_override_kwargs
-    )
+    tool_runner = ToolRunner(tool, tool_args)
    tool_kickoff = tool_runner.kickoff()

    emit_packet(tool_kickoff, writer)
--- a/backend/onyx/agents/agent_search/orchestration/nodes/choose_tool.py
+++ b/backend/onyx/agents/agent_search/orchestration/nodes/choose_tool.py
@@ -15,17 +15,8 @@ from onyx.chat.tool_handling.tool_response_handler import get_tool_by_name
 from onyx.chat.tool_handling.tool_response_handler import (
    get_tool_call_for_non_tool_calling_llm_impl,
 )
-from onyx.context.search.preprocessing.preprocessing import query_analysis
-from onyx.context.search.retrieval.search_runner import get_query_embedding
-from onyx.tools.models import SearchToolOverrideKwargs
 from onyx.tools.tool import Tool
-from onyx.tools.tool_implementations.search.search_tool import SearchTool
 from onyx.utils.logger import setup_logger
-from onyx.utils.threadpool_concurrency import run_in_background
-from onyx.utils.threadpool_concurrency import TimeoutThread
-from onyx.utils.threadpool_concurrency import wait_on_background
-from onyx.utils.timing import log_function_time
-from shared_configs.model_server_models import Embedding

 logger = setup_logger()

@@ -34,7 +25,6 @@ logger = setup_logger()
 # and a function that handles extracting the necessary fields
 # from the state and config
 # TODO: fan-out to multiple tool call nodes? Make this configurable?
-@log_function_time(print_only=True)
 def choose_tool(
    state: ToolChoiceState,
    config: RunnableConfig,
@@ -47,31 +37,6 @@ def choose_tool(
    should_stream_answer = state.should_stream_answer

    agent_config = cast(GraphConfig, config["metadata"]["config"])
-
-    force_use_tool = agent_config.tooling.force_use_tool
-
-    embedding_thread: TimeoutThread[Embedding] | None = None
-    keyword_thread: TimeoutThread[tuple[bool, list[str]]] | None = None
-    override_kwargs: SearchToolOverrideKwargs | None = None
-    if (
-        not agent_config.behavior.use_agentic_search
-        and agent_config.tooling.search_tool is not None
-        and (
-            not force_use_tool.force_use or force_use_tool.tool_name == SearchTool.name
-        )
-    ):
-        override_kwargs = SearchToolOverrideKwargs()
-        # Run in a background thread to avoid blocking the main thread
-        embedding_thread = run_in_background(
-            get_query_embedding,
-            agent_config.inputs.search_request.query,
-            agent_config.persistence.db_session,
-        )
-        keyword_thread = run_in_background(
-            query_analysis,
-            agent_config.inputs.search_request.query,
-        )
-
    using_tool_calling_llm = agent_config.tooling.using_tool_calling_llm
    prompt_builder = state.prompt_snapshot or agent_config.inputs.prompt_builder

@@ -82,6 +47,7 @@ def choose_tool(
    tools = [
        tool for tool in (agent_config.tooling.tools or []) if tool.name in state.tools
    ]
+    force_use_tool = agent_config.tooling.force_use_tool

    tool, tool_args = None, None
    if force_use_tool.force_use and force_use_tool.args is not None:
@@ -105,22 +71,11 @@ def choose_tool(
    # If we have a tool and tool args, we are ready to request a tool call.
    # This only happens if the tool call was forced or we are using a non-tool calling LLM.
    if tool and tool_args:
-        if embedding_thread and tool.name == SearchTool._NAME:
-            # Wait for the embedding thread to finish
-            embedding = wait_on_background(embedding_thread)
-            assert override_kwargs is not None, "must have override kwargs"
-            override_kwargs.precomputed_query_embedding = embedding
-        if keyword_thread and tool.name == SearchTool._NAME:
-            is_keyword, keywords = wait_on_background(keyword_thread)
-            assert override_kwargs is not None, "must have override kwargs"
-            override_kwargs.precomputed_is_keyword = is_keyword
-            override_kwargs.precomputed_keywords = keywords
        return ToolChoiceUpdate(
            tool_choice=ToolChoice(
                tool=tool,
                tool_args=tool_args,
                id=str(uuid4()),
-                search_tool_override_kwargs=override_kwargs,
            ),
        )

@@ -143,16 +98,8 @@ def choose_tool(
        # For tool calling LLMs, we want to insert the task prompt as part of this flow, this is because the LLM
        # may choose to not call any tools and just generate the answer, in which case the task prompt is needed.
        prompt=built_prompt,
-        tools=(
-            [tool.tool_definition() for tool in tools] or None
-            if using_tool_calling_llm
-            else None
-        ),
-        tool_choice=(
-            "required"
-            if tools and force_use_tool.force_use and using_tool_calling_llm
-            else None
-        ),
+        tools=[tool.tool_definition() for tool in tools] or None,
+        tool_choice=("required" if tools and force_use_tool.force_use else None),
        structured_response_format=structured_response_format,
    )

@@ -198,22 +145,10 @@ def choose_tool(
    logger.debug(f"Selected tool: {selected_tool.name}")
    logger.debug(f"Selected tool call request: {selected_tool_call_request}")

-    if embedding_thread and selected_tool.name == SearchTool._NAME:
-        # Wait for the embedding thread to finish
-        embedding = wait_on_background(embedding_thread)
-        assert override_kwargs is not None, "must have override kwargs"
-        override_kwargs.precomputed_query_embedding = embedding
-    if keyword_thread and selected_tool.name == SearchTool._NAME:
-        is_keyword, keywords = wait_on_background(keyword_thread)
-        assert override_kwargs is not None, "must have override kwargs"
-        override_kwargs.precomputed_is_keyword = is_keyword
-        override_kwargs.precomputed_keywords = keywords
-
    return ToolChoiceUpdate(
        tool_choice=ToolChoice(
            tool=selected_tool,
            tool_args=selected_tool_call_request["args"],
            id=selected_tool_call_request["id"],
-            search_tool_override_kwargs=override_kwargs,
        ),
    )
--- a/backend/onyx/agents/agent_search/orchestration/nodes/use_tool_response.py
+++ b/backend/onyx/agents/agent_search/orchestration/nodes/use_tool_response.py
@@ -9,23 +9,18 @@ from onyx.agents.agent_search.basic.states import BasicState
 from onyx.agents.agent_search.basic.utils import process_llm_stream
 from onyx.agents.agent_search.models import GraphConfig
 from onyx.chat.models import LlmDoc
+from onyx.chat.models import OnyxContexts
 from onyx.tools.tool_implementations.search.search_tool import (
-    SEARCH_RESPONSE_SUMMARY_ID,
-)
-from onyx.tools.tool_implementations.search.search_tool import SearchResponseSummary
-from onyx.tools.tool_implementations.search.search_utils import (
-    context_from_inference_section,
+    SEARCH_DOC_CONTENT_ID,
 )
 from onyx.tools.tool_implementations.search_like_tool_utils import (
    FINAL_CONTEXT_DOCUMENTS_ID,
 )
 from onyx.utils.logger import setup_logger
-from onyx.utils.timing import log_function_time

 logger = setup_logger()


-@log_function_time(print_only=True)
 def basic_use_tool_response(
    state: BasicState, config: RunnableConfig, writer: StreamWriter = lambda _: None
 ) -> BasicOutput:
@@ -55,13 +50,11 @@ def basic_use_tool_response(
    for yield_item in tool_call_responses:
        if yield_item.id == FINAL_CONTEXT_DOCUMENTS_ID:
            final_search_results = cast(list[LlmDoc], yield_item.response)
-        elif yield_item.id == SEARCH_RESPONSE_SUMMARY_ID:
-            search_response_summary = cast(SearchResponseSummary, yield_item.response)
-            for section in search_response_summary.top_sections:
-                if section.center_chunk.document_id not in initial_search_results:
-                    initial_search_results.append(
-                        context_from_inference_section(section)
-                    )
+        elif yield_item.id == SEARCH_DOC_CONTENT_ID:
+            search_contexts = cast(OnyxContexts, yield_item.response).contexts
+            for doc in search_contexts:
+                if doc.document_id not in initial_search_results:
+                    initial_search_results.append(doc)

    new_tool_call_chunk = AIMessageChunk(content="")
    if not agent_config.behavior.skip_gen_ai_answer_generation:
--- a/backend/onyx/agents/agent_search/orchestration/states.py
+++ b/backend/onyx/agents/agent_search/orchestration/states.py
@@ -2,7 +2,6 @@ from pydantic import BaseModel

 from onyx.chat.prompt_builder.answer_prompt_builder import PromptSnapshot
 from onyx.tools.message import ToolCallSummary
-from onyx.tools.models import SearchToolOverrideKwargs
 from onyx.tools.models import ToolCallFinalResult
 from onyx.tools.models import ToolCallKickoff
 from onyx.tools.models import ToolResponse
@@ -36,7 +35,6 @@ class ToolChoice(BaseModel):
    tool: Tool
    tool_args: dict
    id: str | None
-    search_tool_override_kwargs: SearchToolOverrideKwargs | None = None

    class Config:
        arbitrary_types_allowed = True
--- a/backend/onyx/agents/agent_search/shared_graph_utils/constants.py
+++ b/backend/onyx/agents/agent_search/shared_graph_utils/constants.py
@@ -13,11 +13,6 @@ AGENT_NEGATIVE_VALUE_STR = "no"
 AGENT_ANSWER_SEPARATOR = "Answer:"


-EMBEDDING_KEY = "embedding"
-IS_KEYWORD_KEY = "is_keyword"
-KEYWORDS_KEY = "keywords"
-
-
 class AgentLLMErrorType(str, Enum):
    TIMEOUT = "timeout"
    RATE_LIMIT = "rate_limit"
--- a/backend/onyx/agents/agent_search/shared_graph_utils/utils.py
+++ b/backend/onyx/agents/agent_search/shared_graph_utils/utils.py
@@ -42,7 +42,6 @@ from onyx.chat.models import StreamStopInfo
 from onyx.chat.models import StreamStopReason
 from onyx.chat.models import StreamType
 from onyx.chat.prompt_builder.answer_prompt_builder import AnswerPromptBuilder
-from onyx.configs.agent_configs import AGENT_MAX_TOKENS_HISTORY_SUMMARY
 from onyx.configs.agent_configs import (
    AGENT_TIMEOUT_CONNECT_LLM_HISTORY_SUMMARY_GENERATION,
 )
@@ -62,7 +61,6 @@ from onyx.db.persona import Persona
 from onyx.llm.chat_llm import LLMRateLimitError
 from onyx.llm.chat_llm import LLMTimeoutError
 from onyx.llm.interfaces import LLM
-from onyx.llm.interfaces import LLMConfig
 from onyx.prompts.agent_search import (
    ASSISTANT_SYSTEM_PROMPT_DEFAULT,
 )
@@ -404,7 +402,6 @@ def summarize_history(
            llm.invoke,
            history_context_prompt,
            timeout_override=AGENT_TIMEOUT_CONNECT_LLM_HISTORY_SUMMARY_GENERATION,
-            max_tokens=AGENT_MAX_TOKENS_HISTORY_SUMMARY,
        )
    except (LLMTimeoutError, TimeoutError):
        logger.error("LLM Timeout Error - summarize history")
@@ -508,9 +505,3 @@ def get_deduplicated_structured_subquestion_documents(
        cited_documents=dedup_inference_section_list(cited_docs),
        context_documents=dedup_inference_section_list(context_docs),
    )
-
-
-def _should_restrict_tokens(llm_config: LLMConfig) -> bool:
-    return not (
-        llm_config.model_provider == "openai" and llm_config.model_name.startswith("o")
-    )
--- a/backend/onyx/auth/users.py
+++ b/backend/onyx/auth/users.py
@@ -587,20 +587,14 @@ class UserManager(UUIDIDMixin, BaseUserManager[User, uuid.UUID]):
    ) -> Optional[User]:
        email = credentials.username

-        tenant_id: str | None = None
-        try:
-            tenant_id = fetch_ee_implementation_or_noop(
-                "onyx.server.tenants.provisioning",
-                "get_tenant_id_for_email",
-                None,
-            )(
-                email=email,
-            )
-        except Exception as e:
-            logger.warning(
-                f"User attempted to login with invalid credentials: {str(e)}"
-            )
-
+        # Get tenant_id from mapping table
+        tenant_id = await fetch_ee_implementation_or_noop(
+            "onyx.server.tenants.provisioning",
+            "get_or_provision_tenant",
+            async_return_default_schema,
+        )(
+            email=email,
+        )
        if not tenant_id:
            # User not found in mapping
            self.password_helper.hash(credentials.password)
--- a/backend/onyx/background/celery/apps/light.py
+++ b/backend/onyx/background/celery/apps/light.py
@@ -111,6 +111,5 @@ celery_app.autodiscover_tasks(
        "onyx.background.celery.tasks.vespa",
        "onyx.background.celery.tasks.connector_deletion",
        "onyx.background.celery.tasks.doc_permission_syncing",
-        "onyx.background.celery.tasks.indexing",
    ]
 )
--- a/backend/onyx/background/celery/memory_monitoring.py
+++ b/backend/onyx/background/celery/memory_monitoring.py
@@ -1,73 +0,0 @@
-# backend/onyx/background/celery/memory_monitoring.py
-import logging
-import os
-from logging.handlers import RotatingFileHandler
-
-import psutil
-
-from onyx.utils.logger import is_running_in_container
-from onyx.utils.logger import setup_logger
-
-# Regular application logger
-logger = setup_logger()
-
-# Only set up memory monitoring in container environment
-if is_running_in_container():
-    # Set up a dedicated memory monitoring logger
-    MEMORY_LOG_DIR = "/var/log/persisted-logs/memory"
-    MEMORY_LOG_FILE = os.path.join(MEMORY_LOG_DIR, "memory_usage.log")
-    MEMORY_LOG_MAX_BYTES = 10 * 1024 * 1024  # 10MB
-    MEMORY_LOG_BACKUP_COUNT = 5  # Keep 5 backup files
-
-    # Ensure log directory exists
-    os.makedirs(MEMORY_LOG_DIR, exist_ok=True)
-
-    # Create a dedicated logger for memory monitoring
-    memory_logger = logging.getLogger("memory_monitoring")
-    memory_logger.setLevel(logging.INFO)
-
-    # Create a rotating file handler
-    memory_handler = RotatingFileHandler(
-        MEMORY_LOG_FILE,
-        maxBytes=MEMORY_LOG_MAX_BYTES,
-        backupCount=MEMORY_LOG_BACKUP_COUNT,
-    )
-
-    # Create a formatter that includes all relevant information
-    memory_formatter = logging.Formatter(
-        "%(asctime)s [%(levelname)s] %(message)s", datefmt="%Y-%m-%d %H:%M:%S"
-    )
-    memory_handler.setFormatter(memory_formatter)
-    memory_logger.addHandler(memory_handler)
-else:
-    # Create a null logger when not in container
-    memory_logger = logging.getLogger("memory_monitoring")
-    memory_logger.addHandler(logging.NullHandler())
-
-
-def emit_process_memory(
-    pid: int, process_name: str, additional_metadata: dict[str, str | int]
-) -> None:
-    # Skip memory monitoring if not in container
-    if not is_running_in_container():
-        return
-
-    try:
-        process = psutil.Process(pid)
-        memory_info = process.memory_info()
-        cpu_percent = process.cpu_percent(interval=0.1)
-
-        # Build metadata string from additional_metadata dictionary
-        metadata_str = " ".join(
-            [f"{key}={value}" for key, value in additional_metadata.items()]
-        )
-        metadata_str = f" {metadata_str}" if metadata_str else ""
-
-        memory_logger.info(
-            f"PROCESS_MEMORY process_name={process_name} pid={pid} "
-            f"rss_mb={memory_info.rss / (1024 * 1024):.2f} "
-            f"vms_mb={memory_info.vms / (1024 * 1024):.2f} "
-            f"cpu={cpu_percent:.2f}{metadata_str}"
-        )
-    except Exception:
-        logger.exception("Error monitoring process memory.")
--- a/backend/onyx/background/celery/tasks/indexing/tasks.py
+++ b/backend/onyx/background/celery/tasks/indexing/tasks.py
@@ -23,7 +23,6 @@ from sqlalchemy.orm import Session

 from onyx.background.celery.apps.app_base import task_logger
 from onyx.background.celery.celery_utils import httpx_init_vespa_pool
-from onyx.background.celery.memory_monitoring import emit_process_memory
 from onyx.background.celery.tasks.indexing.utils import get_unfenced_index_attempt_ids
 from onyx.background.celery.tasks.indexing.utils import IndexingCallback
 from onyx.background.celery.tasks.indexing.utils import should_index
@@ -985,9 +984,6 @@ def connector_indexing_proxy_task(
    redis_connector = RedisConnector(tenant_id, cc_pair_id)
    redis_connector_index = redis_connector.new_index(search_settings_id)

-    # Track the last time memory info was emitted
-    last_memory_emit_time = 0.0
-
    try:
        with get_session_with_current_tenant() as db_session:
            index_attempt = get_index_attempt(
@@ -1028,23 +1024,6 @@ def connector_indexing_proxy_task(
                    job.release()
                    break

-            # log the memory usage for tracking down memory leaks / connector-specific memory issues
-            pid = job.process.pid
-            if pid is not None:
-                # Only emit memory info once per minute (60 seconds)
-                current_time = time.monotonic()
-                if current_time - last_memory_emit_time >= 60.0:
-                    emit_process_memory(
-                        pid,
-                        "indexing_worker",
-                        {
-                            "cc_pair_id": cc_pair_id,
-                            "search_settings_id": search_settings_id,
-                            "index_attempt_id": index_attempt_id,
-                        },
-                    )
-                    last_memory_emit_time = current_time
-
            # if a termination signal is detected, break (exit point will clean up)
            if self.request.id and redis_connector_index.terminating(self.request.id):
                task_logger.warning(
@@ -1191,7 +1170,6 @@ def connector_indexing_proxy_task(
    return


-# primary
@shared_task(
    name=OnyxCeleryTask.CHECK_FOR_CHECKPOINT_CLEANUP,
    soft_time_limit=300,
@@ -1239,7 +1217,6 @@ def check_for_checkpoint_cleanup(*, tenant_id: str) -> None:
                )


-# light worker
@shared_task(
    name=OnyxCeleryTask.CLEANUP_CHECKPOINT,
    bind=True,
--- a/backend/onyx/chat/llm_response_handler.py
+++ b/backend/onyx/chat/llm_response_handler.py
@@ -15,8 +15,6 @@ from onyx.chat.stream_processing.answer_response_handler import (
 from onyx.chat.tool_handling.tool_response_handler import ToolResponseHandler


-# This is Legacy code that is not used anymore.
-# It is kept here for reference.
 class LLMResponseHandlerManager:
    """
    This class is responsible for postprocessing the LLM response stream.
--- a/backend/onyx/chat/process_message.py
+++ b/backend/onyx/chat/process_message.py
@@ -756,7 +756,6 @@ def stream_chat_message_objects(
        )

        # LLM prompt building, response capturing, etc.
-
        answer = Answer(
            prompt_builder=prompt_builder,
            is_connected=is_connected,
--- a/backend/onyx/chat/stream_processing/citation_processing.py
+++ b/backend/onyx/chat/stream_processing/citation_processing.py
@@ -90,97 +90,97 @@ class CitationProcessor:
                    next(group for group in citation.groups() if group is not None)
                )

-                if not (1 <= numerical_value <= self.max_citation_num):
-                    continue
-
-                context_llm_doc = self.context_docs[numerical_value - 1]
-                final_citation_num = self.final_order_mapping[
-                    context_llm_doc.document_id
-                ]
-
-                if final_citation_num not in self.citation_order:
-                    self.citation_order.append(final_citation_num)
-
-                citation_order_idx = self.citation_order.index(final_citation_num) + 1
-
-                # get the value that was displayed to user, should always
-                # be in the display_doc_order_dict. But check anyways
-                if context_llm_doc.document_id in self.display_order_mapping:
-                    displayed_citation_num = self.display_order_mapping[
+                if 1 <= numerical_value <= self.max_citation_num:
+                    context_llm_doc = self.context_docs[numerical_value - 1]
+                    final_citation_num = self.final_order_mapping[
                        context_llm_doc.document_id
                    ]
-                else:
-                    displayed_citation_num = final_citation_num
-                    logger.warning(
-                        f"Doc {context_llm_doc.document_id} not in display_doc_order_dict. Used LLM citation number instead."
+
+                    if final_citation_num not in self.citation_order:
+                        self.citation_order.append(final_citation_num)
+
+                    citation_order_idx = (
+                        self.citation_order.index(final_citation_num) + 1
                    )

-                # Skip consecutive citations of the same work
-                if final_citation_num in self.current_citations:
-                    start, end = citation.span()
-                    real_start = length_to_add + start
-                    diff = end - start
-                    self.curr_segment = (
-                        self.curr_segment[: length_to_add + start]
-                        + self.curr_segment[real_start + diff :]
-                    )
-                    length_to_add -= diff
-                    continue
-
-                # Handle edge case where LLM outputs citation itself
-                if self.curr_segment.startswith("[["):
-                    match = re.match(r"\[\[(\d+)\]\]", self.curr_segment)
-                    if match:
-                        try:
-                            doc_id = int(match.group(1))
-                            context_llm_doc = self.context_docs[doc_id - 1]
-                            yield CitationInfo(
-                                # citation_num is now the number post initial ranking, i.e. as displayed to user
-                                citation_num=displayed_citation_num,
-                                document_id=context_llm_doc.document_id,
-                            )
-                        except Exception as e:
-                            logger.warning(
-                                f"Manual LLM citation didn't properly cite documents {e}"
-                            )
+                    # get the value that was displayed to user, should always
+                    # be in the display_doc_order_dict. But check anyways
+                    if context_llm_doc.document_id in self.display_order_mapping:
+                        displayed_citation_num = self.display_order_mapping[
+                            context_llm_doc.document_id
+                        ]
                    else:
+                        displayed_citation_num = final_citation_num
                        logger.warning(
-                            "Manual LLM citation wasn't able to close brackets"
+                            f"Doc {context_llm_doc.document_id} not in display_doc_order_dict. Used LLM citation number instead."
                        )
-                    continue

-                link = context_llm_doc.link
+                    # Skip consecutive citations of the same work
+                    if final_citation_num in self.current_citations:
+                        start, end = citation.span()
+                        real_start = length_to_add + start
+                        diff = end - start
+                        self.curr_segment = (
+                            self.curr_segment[: length_to_add + start]
+                            + self.curr_segment[real_start + diff :]
+                        )
+                        length_to_add -= diff
+                        continue

-                self.past_cite_count = len(self.llm_out)
-                self.current_citations.append(final_citation_num)
+                    # Handle edge case where LLM outputs citation itself
+                    if self.curr_segment.startswith("[["):
+                        match = re.match(r"\[\[(\d+)\]\]", self.curr_segment)
+                        if match:
+                            try:
+                                doc_id = int(match.group(1))
+                                context_llm_doc = self.context_docs[doc_id - 1]
+                                yield CitationInfo(
+                                    # citation_num is now the number post initial ranking, i.e. as displayed to user
+                                    citation_num=displayed_citation_num,
+                                    document_id=context_llm_doc.document_id,
+                                )
+                            except Exception as e:
+                                logger.warning(
+                                    f"Manual LLM citation didn't properly cite documents {e}"
+                                )
+                        else:
+                            logger.warning(
+                                "Manual LLM citation wasn't able to close brackets"
+                            )
+                        continue

-                if citation_order_idx not in self.cited_inds:
-                    self.cited_inds.add(citation_order_idx)
-                    yield CitationInfo(
-                        # citation number is now the one that was displayed to user
-                        citation_num=displayed_citation_num,
-                        document_id=context_llm_doc.document_id,
-                    )
+                    link = context_llm_doc.link

-                start, end = citation.span()
-                if link:
-                    prev_length = len(self.curr_segment)
-                    self.curr_segment = (
-                        self.curr_segment[: start + length_to_add]
-                        + f"[[{displayed_citation_num}]]({link})"  # use the value that was displayed to user
-                        + self.curr_segment[end + length_to_add :]
-                    )
-                    length_to_add += len(self.curr_segment) - prev_length
-                else:
-                    prev_length = len(self.curr_segment)
-                    self.curr_segment = (
-                        self.curr_segment[: start + length_to_add]
-                        + f"[[{displayed_citation_num}]]()"  # use the value that was displayed to user
-                        + self.curr_segment[end + length_to_add :]
-                    )
-                    length_to_add += len(self.curr_segment) - prev_length
+                    self.past_cite_count = len(self.llm_out)
+                    self.current_citations.append(final_citation_num)

-                last_citation_end = end + length_to_add
+                    if citation_order_idx not in self.cited_inds:
+                        self.cited_inds.add(citation_order_idx)
+                        yield CitationInfo(
+                            # citation number is now the one that was displayed to user
+                            citation_num=displayed_citation_num,
+                            document_id=context_llm_doc.document_id,
+                        )
+
+                    start, end = citation.span()
+                    if link:
+                        prev_length = len(self.curr_segment)
+                        self.curr_segment = (
+                            self.curr_segment[: start + length_to_add]
+                            + f"[[{displayed_citation_num}]]({link})"  # use the value that was displayed to user
+                            + self.curr_segment[end + length_to_add :]
+                        )
+                        length_to_add += len(self.curr_segment) - prev_length
+                    else:
+                        prev_length = len(self.curr_segment)
+                        self.curr_segment = (
+                            self.curr_segment[: start + length_to_add]
+                            + f"[[{displayed_citation_num}]]()"  # use the value that was displayed to user
+                            + self.curr_segment[end + length_to_add :]
+                        )
+                        length_to_add += len(self.curr_segment) - prev_length
+
+                    last_citation_end = end + length_to_add

            if last_citation_end > 0:
                result += self.curr_segment[:last_citation_end]
--- a/backend/onyx/configs/agent_configs.py
+++ b/backend/onyx/configs/agent_configs.py
@@ -217,20 +217,20 @@ AGENT_TIMEOUT_LLM_SUBQUESTION_GENERATION = int(
 )


-AGENT_DEFAULT_TIMEOUT_CONNECT_LLM_SUBANSWER_GENERATION = 6  # in seconds
+AGENT_DEFAULT_TIMEOUT_CONNECT_LLM_SUBANSWER_GENERATION = 4  # in seconds
 AGENT_TIMEOUT_CONNECT_LLM_SUBANSWER_GENERATION = int(
    os.environ.get("AGENT_TIMEOUT_CONNECT_LLM_SUBANSWER_GENERATION")
    or AGENT_DEFAULT_TIMEOUT_CONNECT_LLM_SUBANSWER_GENERATION
 )

-AGENT_DEFAULT_TIMEOUT_LLM_SUBANSWER_GENERATION = 40  # in seconds
+AGENT_DEFAULT_TIMEOUT_LLM_SUBANSWER_GENERATION = 30  # in seconds
 AGENT_TIMEOUT_LLM_SUBANSWER_GENERATION = int(
    os.environ.get("AGENT_TIMEOUT_LLM_SUBANSWER_GENERATION")
    or AGENT_DEFAULT_TIMEOUT_LLM_SUBANSWER_GENERATION
 )


-AGENT_DEFAULT_TIMEOUT_CONNECT_LLM_INITIAL_ANSWER_GENERATION = 10  # in seconds
+AGENT_DEFAULT_TIMEOUT_CONNECT_LLM_INITIAL_ANSWER_GENERATION = 5  # in seconds
 AGENT_TIMEOUT_CONNECT_LLM_INITIAL_ANSWER_GENERATION = int(
    os.environ.get("AGENT_TIMEOUT_CONNECT_LLM_INITIAL_ANSWER_GENERATION")
    or AGENT_DEFAULT_TIMEOUT_CONNECT_LLM_INITIAL_ANSWER_GENERATION
@@ -243,13 +243,13 @@ AGENT_TIMEOUT_LLM_INITIAL_ANSWER_GENERATION = int(
 )


-AGENT_DEFAULT_TIMEOUT_CONNECT_LLM_REFINED_ANSWER_GENERATION = 15  # in seconds
+AGENT_DEFAULT_TIMEOUT_CONNECT_LLM_REFINED_ANSWER_GENERATION = 5  # in seconds
 AGENT_TIMEOUT_CONNECT_LLM_REFINED_ANSWER_GENERATION = int(
    os.environ.get("AGENT_TIMEOUT_CONNECT_LLM_REFINED_ANSWER_GENERATION")
    or AGENT_DEFAULT_TIMEOUT_CONNECT_LLM_REFINED_ANSWER_GENERATION
 )

-AGENT_DEFAULT_TIMEOUT_LLM_REFINED_ANSWER_GENERATION = 45  # in seconds
+AGENT_DEFAULT_TIMEOUT_LLM_REFINED_ANSWER_GENERATION = 30  # in seconds
 AGENT_TIMEOUT_LLM_REFINED_ANSWER_GENERATION = int(
    os.environ.get("AGENT_TIMEOUT_LLM_REFINED_ANSWER_GENERATION")
    or AGENT_DEFAULT_TIMEOUT_LLM_REFINED_ANSWER_GENERATION
@@ -333,45 +333,4 @@ AGENT_TIMEOUT_LLM_REFINED_ANSWER_VALIDATION = int(
    or AGENT_DEFAULT_TIMEOUT_LLM_REFINED_ANSWER_VALIDATION
 )

-AGENT_DEFAULT_MAX_TOKENS_VALIDATION = 4
-AGENT_MAX_TOKENS_VALIDATION = int(
-    os.environ.get("AGENT_MAX_TOKENS_VALIDATION") or AGENT_DEFAULT_MAX_TOKENS_VALIDATION
-)
-
-AGENT_DEFAULT_MAX_TOKENS_SUBANSWER_GENERATION = 256
-AGENT_MAX_TOKENS_SUBANSWER_GENERATION = int(
-    os.environ.get("AGENT_MAX_TOKENS_SUBANSWER_GENERATION")
-    or AGENT_DEFAULT_MAX_TOKENS_SUBANSWER_GENERATION
-)
-
-AGENT_DEFAULT_MAX_TOKENS_ANSWER_GENERATION = 1024
-AGENT_MAX_TOKENS_ANSWER_GENERATION = int(
-    os.environ.get("AGENT_MAX_TOKENS_ANSWER_GENERATION")
-    or AGENT_DEFAULT_MAX_TOKENS_ANSWER_GENERATION
-)
-
-AGENT_DEFAULT_MAX_TOKENS_SUBQUESTION_GENERATION = 256
-AGENT_MAX_TOKENS_SUBQUESTION_GENERATION = int(
-    os.environ.get("AGENT_MAX_TOKENS_SUBQUESTION_GENERATION")
-    or AGENT_DEFAULT_MAX_TOKENS_SUBQUESTION_GENERATION
-)
-
-AGENT_DEFAULT_MAX_TOKENS_ENTITY_TERM_EXTRACTION = 1024
-AGENT_MAX_TOKENS_ENTITY_TERM_EXTRACTION = int(
-    os.environ.get("AGENT_MAX_TOKENS_ENTITY_TERM_EXTRACTION")
-    or AGENT_DEFAULT_MAX_TOKENS_ENTITY_TERM_EXTRACTION
-)
-
-AGENT_DEFAULT_MAX_TOKENS_SUBQUERY_GENERATION = 64
-AGENT_MAX_TOKENS_SUBQUERY_GENERATION = int(
-    os.environ.get("AGENT_MAX_TOKENS_SUBQUERY_GENERATION")
-    or AGENT_DEFAULT_MAX_TOKENS_SUBQUERY_GENERATION
-)
-
-AGENT_DEFAULT_MAX_TOKENS_HISTORY_SUMMARY = 128
-AGENT_MAX_TOKENS_HISTORY_SUMMARY = int(
-    os.environ.get("AGENT_MAX_TOKENS_HISTORY_SUMMARY")
-    or AGENT_DEFAULT_MAX_TOKENS_HISTORY_SUMMARY
-)
-
 GRAPH_VERSION_NAME: str = "a"
--- a/backend/onyx/configs/app_configs.py
+++ b/backend/onyx/configs/app_configs.py
@@ -640,6 +640,3 @@ TEST_ENV = os.environ.get("TEST_ENV", "").lower() == "true"
 MOCK_LLM_RESPONSE = (
    os.environ.get("MOCK_LLM_RESPONSE") if os.environ.get("MOCK_LLM_RESPONSE") else None
 )
-
-
-DEFAULT_IMAGE_ANALYSIS_MAX_SIZE_MB = 20
--- a/backend/onyx/configs/llm_configs.py
+++ b/backend/onyx/configs/llm_configs.py
@@ -1,38 +0,0 @@
-from onyx.configs.app_configs import DEFAULT_IMAGE_ANALYSIS_MAX_SIZE_MB
-from onyx.server.settings.store import load_settings
-
-
-def get_image_extraction_and_analysis_enabled() -> bool:
-    """Get image extraction and analysis enabled setting from workspace settings or fallback to False"""
-    try:
-        settings = load_settings()
-        if settings.image_extraction_and_analysis_enabled is not None:
-            return settings.image_extraction_and_analysis_enabled
-    except Exception:
-        pass
-
-    return False
-
-
-def get_search_time_image_analysis_enabled() -> bool:
-    """Get search time image analysis enabled setting from workspace settings or fallback to False"""
-    try:
-        settings = load_settings()
-        if settings.search_time_image_analysis_enabled is not None:
-            return settings.search_time_image_analysis_enabled
-    except Exception:
-        pass
-
-    return False
-
-
-def get_image_analysis_max_size_mb() -> int:
-    """Get image analysis max size MB setting from workspace settings or fallback to environment variable"""
-    try:
-        settings = load_settings()
-        if settings.image_analysis_max_size_mb is not None:
-            return settings.image_analysis_max_size_mb
-    except Exception:
-        pass
-
-    return DEFAULT_IMAGE_ANALYSIS_MAX_SIZE_MB
--- a/backend/onyx/connectors/airtable/airtable_connector.py
+++ b/backend/onyx/connectors/airtable/airtable_connector.py
@@ -200,6 +200,7 @@ class AirtableConnector(LoadConnector):
                                        return attachment_response.content

                            logger.error(f"Failed to refresh attachment for {filename}")
+
                        raise

                attachment_content = get_attachment_with_retry(url, record_id)
--- a/backend/onyx/connectors/confluence/connector.py
+++ b/backend/onyx/connectors/confluence/connector.py
@@ -11,12 +11,13 @@ from onyx.configs.app_configs import CONFLUENCE_TIMEZONE_OFFSET
 from onyx.configs.app_configs import CONTINUE_ON_CONNECTOR_FAILURE
 from onyx.configs.app_configs import INDEX_BATCH_SIZE
 from onyx.configs.constants import DocumentSource
-from onyx.connectors.confluence.onyx_confluence import extract_text_from_confluence_html
+from onyx.connectors.confluence.onyx_confluence import attachment_to_content
+from onyx.connectors.confluence.onyx_confluence import (
+    extract_text_from_confluence_html,
+)
 from onyx.connectors.confluence.onyx_confluence import OnyxConfluence
 from onyx.connectors.confluence.utils import build_confluence_document_id
-from onyx.connectors.confluence.utils import convert_attachment_to_content
 from onyx.connectors.confluence.utils import datetime_from_string
-from onyx.connectors.confluence.utils import process_attachment
 from onyx.connectors.confluence.utils import validate_attachment_filetype
 from onyx.connectors.exceptions import ConnectorValidationError
 from onyx.connectors.exceptions import CredentialExpiredError
@@ -35,26 +36,28 @@ from onyx.connectors.models import ConnectorMissingCredentialError
 from onyx.connectors.models import Document
 from onyx.connectors.models import Section
 from onyx.connectors.models import SlimDocument
-from onyx.connectors.vision_enabled_connector import VisionEnabledConnector
 from onyx.indexing.indexing_heartbeat import IndexingHeartbeatInterface
 from onyx.utils.logger import setup_logger

 logger = setup_logger()
+
 # Potential Improvements
-# 1. Segment into Sections for more accurate linking, can split by headers but make sure no text/ordering is lost
+# 1. Include attachments, etc
+# 2. Segment into Sections for more accurate linking, can split by headers but make sure no text/ordering is lost
+
 _COMMENT_EXPANSION_FIELDS = ["body.storage.value"]
 _PAGE_EXPANSION_FIELDS = [
    "body.storage.value",
    "version",
    "space",
    "metadata.labels",
-    "history.lastUpdated",
 ]
 _ATTACHMENT_EXPANSION_FIELDS = [
    "version",
    "space",
    "metadata.labels",
 ]
+
 _RESTRICTIONS_EXPANSION_FIELDS = [
    "space",
    "restrictions.read.restrictions.user",
@@ -66,6 +69,9 @@ _RESTRICTIONS_EXPANSION_FIELDS = [
 _SLIM_DOC_BATCH_SIZE = 5000

 _ATTACHMENT_EXTENSIONS_TO_FILTER_OUT = [
+    "png",
+    "jpg",
+    "jpeg",
    "gif",
    "mp4",
    "mov",
@@ -81,11 +87,7 @@ _FULL_EXTENSION_FILTER_STRING = "".join(


 class ConfluenceConnector(
-    LoadConnector,
-    PollConnector,
-    SlimConnector,
-    CredentialsConnector,
-    VisionEnabledConnector,
+    LoadConnector, PollConnector, SlimConnector, CredentialsConnector
 ):
    def __init__(
        self,
@@ -103,24 +105,13 @@ class ConfluenceConnector(
        labels_to_skip: list[str] = CONFLUENCE_CONNECTOR_LABELS_TO_SKIP,
        timezone_offset: float = CONFLUENCE_TIMEZONE_OFFSET,
    ) -> None:
-        self.wiki_base = wiki_base
-        self.is_cloud = is_cloud
-        self.space = space
-        self.page_id = page_id
-        self.index_recursively = index_recursively
-        self.cql_query = cql_query
        self.batch_size = batch_size
        self.continue_on_failure = continue_on_failure
-        self.labels_to_skip = labels_to_skip
-        self.timezone_offset = timezone_offset
-        self._confluence_client: OnyxConfluence | None = None
-        self._fetched_titles: set[str] = set()
-
-        # Initialize vision LLM using the mixin
-        self.initialize_vision_llm()
+        self.is_cloud = is_cloud

        # Remove trailing slash from wiki_base if present
        self.wiki_base = wiki_base.rstrip("/")
+
        """
        If nothing is provided, we default to fetching all pages
        Only one or none of the following options should be specified so
@@ -162,6 +153,8 @@ class ConfluenceConnector(
            "max_backoff_seconds": 60,
        }

+        self._confluence_client: OnyxConfluence | None = None
+
    @property
    def confluence_client(self) -> OnyxConfluence:
        if self._confluence_client is None:
@@ -191,6 +184,7 @@ class ConfluenceConnector(
        end: SecondsSinceUnixEpoch | None = None,
    ) -> str:
        page_query = self.base_cql_page_query + self.cql_label_filter
+
        # Add time filters
        if start:
            formatted_start_time = datetime.fromtimestamp(
@@ -202,6 +196,7 @@ class ConfluenceConnector(
                "%Y-%m-%d %H:%M"
            )
            page_query += f" and lastmodified <= '{formatted_end_time}'"
+
        return page_query

    def _construct_attachment_query(self, confluence_page_id: str) -> str:
@@ -212,10 +207,11 @@ class ConfluenceConnector(

    def _get_comment_string_for_page_id(self, page_id: str) -> str:
        comment_string = ""
+
        comment_cql = f"type=comment and container='{page_id}'"
        comment_cql += self.cql_label_filter
-        expand = ",".join(_COMMENT_EXPANSION_FIELDS)

+        expand = ",".join(_COMMENT_EXPANSION_FIELDS)
        for comment in self.confluence_client.paginated_cql_retrieval(
            cql=comment_cql,
            expand=expand,
@@ -226,179 +222,123 @@ class ConfluenceConnector(
                confluence_object=comment,
                fetched_titles=set(),
            )
+
        return comment_string

-    def _convert_page_to_document(self, page: dict[str, Any]) -> Document | None:
+    def _convert_object_to_document(
+        self,
+        confluence_object: dict[str, Any],
+        parent_content_id: str | None = None,
+    ) -> Document | None:
        """
-        Converts a Confluence page to a Document object.
-        Includes the page content, comments, and attachments.
-        """
-        try:
-            # Extract basic page information
-            page_id = page["id"]
-            page_title = page["title"]
-            page_url = f"{self.wiki_base}{page['_links']['webui']}"
+        Takes in a confluence object, extracts all metadata, and converts it into a document.
+        If its a page, it extracts the text, adds the comments for the document text.
+        If its an attachment, it just downloads the attachment and converts that into a document.

-            # Get the page content
-            page_content = extract_text_from_confluence_html(
-                self.confluence_client, page, self._fetched_titles
+        parent_content_id: if the object is an attachment, specifies the content id that
+        the attachment is attached to
+        """
+        # The url and the id are the same
+        object_url = build_confluence_document_id(
+            self.wiki_base, confluence_object["_links"]["webui"], self.is_cloud
+        )
+
+        object_text = None
+        # Extract text from page
+        if confluence_object["type"] == "page":
+            object_text = extract_text_from_confluence_html(
+                confluence_client=self.confluence_client,
+                confluence_object=confluence_object,
+                fetched_titles={confluence_object.get("title", "")},
+            )
+            # Add comments to text
+            object_text += self._get_comment_string_for_page_id(confluence_object["id"])
+        elif confluence_object["type"] == "attachment":
+            object_text = attachment_to_content(
+                confluence_client=self.confluence_client,
+                attachment=confluence_object,
+                parent_content_id=parent_content_id,
            )

-            # Create the main section for the page content
-            sections = [Section(text=page_content, link=page_url)]
-
-            # Process comments if available
-            comment_text = self._get_comment_string_for_page_id(page_id)
-            if comment_text:
-                sections.append(Section(text=comment_text, link=f"{page_url}#comments"))
-
-            # Process attachments
-            if "children" in page and "attachment" in page["children"]:
-                attachments = self.confluence_client.get_attachments_for_page(
-                    page_id, expand="metadata"
-                )
-
-                for attachment in attachments.get("results", []):
-                    # Process each attachment
-                    result = process_attachment(
-                        self.confluence_client,
-                        attachment,
-                        page_title,
-                        self.image_analysis_llm,
-                    )
-
-                    if result.text:
-                        # Create a section for the attachment text
-                        attachment_section = Section(
-                            text=result.text,
-                            link=f"{page_url}#attachment-{attachment['id']}",
-                            image_file_name=result.file_name,
-                        )
-                        sections.append(attachment_section)
-                    elif result.error:
-                        logger.warning(
-                            f"Error processing attachment '{attachment.get('title')}': {result.error}"
-                        )
-
-            # Extract metadata
-            metadata = {}
-            if "space" in page:
-                metadata["space"] = page["space"].get("name", "")
-
-            # Extract labels
-            labels = []
-            if "metadata" in page and "labels" in page["metadata"]:
-                for label in page["metadata"]["labels"].get("results", []):
-                    labels.append(label.get("name", ""))
-            if labels:
-                metadata["labels"] = labels
-
-            # Extract owners
-            primary_owners = []
-            if "version" in page and "by" in page["version"]:
-                author = page["version"]["by"]
-                display_name = author.get("displayName", "Unknown")
-                primary_owners.append(BasicExpertInfo(display_name=display_name))
-
-            # Create the document
-            return Document(
-                id=build_confluence_document_id(
-                    self.wiki_base, page["_links"]["webui"], self.is_cloud
-                ),
-                sections=sections,
-                source=DocumentSource.CONFLUENCE,
-                semantic_identifier=page_title,
-                metadata=metadata,
-                doc_updated_at=datetime_from_string(page["version"]["when"]),
-                primary_owners=primary_owners if primary_owners else None,
-            )
-        except Exception as e:
-            logger.error(f"Error converting page {page.get('id', 'unknown')}: {e}")
-            if not self.continue_on_failure:
-                raise
+        if object_text is None:
+            # This only happens for attachments that are not parseable
            return None

+        # Get space name
+        doc_metadata: dict[str, str | list[str]] = {
+            "Wiki Space Name": confluence_object["space"]["name"]
+        }
+
+        # Get labels
+        label_dicts = (
+            confluence_object.get("metadata", {}).get("labels", {}).get("results", [])
+        )
+        page_labels = [label.get("name") for label in label_dicts if label.get("name")]
+        if page_labels:
+            doc_metadata["labels"] = page_labels
+
+        # Get last modified and author email
+        version_dict = confluence_object.get("version", {})
+        last_modified = (
+            datetime_from_string(version_dict.get("when"))
+            if version_dict.get("when")
+            else None
+        )
+        author_email = version_dict.get("by", {}).get("email")
+
+        title = confluence_object.get("title", "Untitled Document")
+
+        return Document(
+            id=object_url,
+            sections=[Section(link=object_url, text=object_text)],
+            source=DocumentSource.CONFLUENCE,
+            semantic_identifier=title,
+            doc_updated_at=last_modified,
+            primary_owners=(
+                [BasicExpertInfo(email=author_email)] if author_email else None
+            ),
+            metadata=doc_metadata,
+        )
+
    def _fetch_document_batches(
        self,
        start: SecondsSinceUnixEpoch | None = None,
        end: SecondsSinceUnixEpoch | None = None,
    ) -> GenerateDocumentsOutput:
-        """
-        Yields batches of Documents. For each page:
-         - Create a Document with 1 Section for the page text/comments
-         - Then fetch attachments. For each attachment:
-             - Attempt to convert it with convert_attachment_to_content(...)
-             - If successful, create a new Section with the extracted text or summary.
-        """
        doc_batch: list[Document] = []
+        confluence_page_ids: list[str] = []

        page_query = self._construct_page_query(start, end)
        logger.debug(f"page_query: {page_query}")
-
+        # Fetch pages as Documents
        for page in self.confluence_client.paginated_cql_retrieval(
            cql=page_query,
            expand=",".join(_PAGE_EXPANSION_FIELDS),
            limit=self.batch_size,
        ):
-            # Build doc from page
-            doc = self._convert_page_to_document(page)
-            if not doc:
-                continue
-
-            # Now get attachments for that page:
-            attachment_query = self._construct_attachment_query(page["id"])
-            # We'll use the page's XML to provide context if we summarize an image
-            confluence_xml = page.get("body", {}).get("storage", {}).get("value", "")
+            logger.debug(f"_fetch_document_batches: {page['id']}")
+            confluence_page_ids.append(page["id"])
+            doc = self._convert_object_to_document(page)
+            if doc is not None:
+                doc_batch.append(doc)
+            if len(doc_batch) >= self.batch_size:
+                yield doc_batch
+                doc_batch = []

+        # Fetch attachments as Documents
+        for confluence_page_id in confluence_page_ids:
+            attachment_query = self._construct_attachment_query(confluence_page_id)
+            # TODO: maybe should add time filter as well?
            for attachment in self.confluence_client.paginated_cql_retrieval(
                cql=attachment_query,
                expand=",".join(_ATTACHMENT_EXPANSION_FIELDS),
            ):
-                attachment["metadata"].get("mediaType", "")
-                if not validate_attachment_filetype(
-                    attachment, self.image_analysis_llm
-                ):
-                    continue
-
-                # Attempt to get textual content or image summarization:
-                try:
-                    logger.info(f"Processing attachment: {attachment['title']}")
-                    response = convert_attachment_to_content(
-                        confluence_client=self.confluence_client,
-                        attachment=attachment,
-                        page_context=confluence_xml,
-                        llm=self.image_analysis_llm,
-                    )
-                    if response is None:
-                        continue
-
-                    content_text, file_storage_name = response
-
-                    object_url = build_confluence_document_id(
-                        self.wiki_base, attachment["_links"]["webui"], self.is_cloud
-                    )
-
-                    if content_text:
-                        doc.sections.append(
-                            Section(
-                                text=content_text,
-                                link=object_url,
-                                image_file_name=file_storage_name,
-                            )
-                        )
-                except Exception as e:
-                    logger.error(
-                        f"Failed to extract/summarize attachment {attachment['title']}",
-                        exc_info=e,
-                    )
-                    if not self.continue_on_failure:
-                        raise
-
-            doc_batch.append(doc)
-
-            if len(doc_batch) >= self.batch_size:
-                yield doc_batch
-                doc_batch = []
+                doc = self._convert_object_to_document(attachment, confluence_page_id)
+                if doc is not None:
+                    doc_batch.append(doc)
+                if len(doc_batch) >= self.batch_size:
+                    yield doc_batch
+                    doc_batch = []

        if doc_batch:
            yield doc_batch
@@ -419,63 +359,55 @@ class ConfluenceConnector(
        end: SecondsSinceUnixEpoch | None = None,
        callback: IndexingHeartbeatInterface | None = None,
    ) -> GenerateSlimDocumentOutput:
-        """
-        Return 'slim' docs (IDs + minimal permission data).
-        Does not fetch actual text. Used primarily for incremental permission sync.
-        """
        doc_metadata_list: list[SlimDocument] = []
+
        restrictions_expand = ",".join(_RESTRICTIONS_EXPANSION_FIELDS)

-        # Query pages
        page_query = self.base_cql_page_query + self.cql_label_filter
        for page in self.confluence_client.cql_paginate_all_expansions(
            cql=page_query,
            expand=restrictions_expand,
            limit=_SLIM_DOC_BATCH_SIZE,
        ):
+            # If the page has restrictions, add them to the perm_sync_data
+            # These will be used by doc_sync.py to sync permissions
            page_restrictions = page.get("restrictions")
            page_space_key = page.get("space", {}).get("key")
            page_ancestors = page.get("ancestors", [])
-
            page_perm_sync_data = {
                "restrictions": page_restrictions or {},
                "space_key": page_space_key,
-                "ancestors": page_ancestors,
+                "ancestors": page_ancestors or [],
            }

            doc_metadata_list.append(
                SlimDocument(
                    id=build_confluence_document_id(
-                        self.wiki_base, page["_links"]["webui"], self.is_cloud
+                        self.wiki_base,
+                        page["_links"]["webui"],
+                        self.is_cloud,
                    ),
                    perm_sync_data=page_perm_sync_data,
                )
            )
-
-            # Query attachments for each page
            attachment_query = self._construct_attachment_query(page["id"])
            for attachment in self.confluence_client.cql_paginate_all_expansions(
                cql=attachment_query,
                expand=restrictions_expand,
                limit=_SLIM_DOC_BATCH_SIZE,
            ):
-                # If you skip images, you'll skip them in the permission sync
-                attachment["metadata"].get("mediaType", "")
-                if not validate_attachment_filetype(
-                    attachment, self.image_analysis_llm
-                ):
+                if not validate_attachment_filetype(attachment):
                    continue
-
-                attachment_restrictions = attachment.get("restrictions", {})
+                attachment_restrictions = attachment.get("restrictions")
                if not attachment_restrictions:
-                    attachment_restrictions = page_restrictions or {}
+                    attachment_restrictions = page_restrictions

                attachment_space_key = attachment.get("space", {}).get("key")
                if not attachment_space_key:
                    attachment_space_key = page_space_key

                attachment_perm_sync_data = {
-                    "restrictions": attachment_restrictions,
+                    "restrictions": attachment_restrictions or {},
                    "space_key": attachment_space_key,
                }

@@ -489,16 +421,16 @@ class ConfluenceConnector(
                        perm_sync_data=attachment_perm_sync_data,
                    )
                )
-
            if len(doc_metadata_list) > _SLIM_DOC_BATCH_SIZE:
                yield doc_metadata_list[:_SLIM_DOC_BATCH_SIZE]
                doc_metadata_list = doc_metadata_list[_SLIM_DOC_BATCH_SIZE:]

-                if callback and callback.should_stop():
-                    raise RuntimeError(
-                        "retrieve_all_slim_documents: Stop signal detected"
-                    )
                if callback:
+                    if callback.should_stop():
+                        raise RuntimeError(
+                            "retrieve_all_slim_documents: Stop signal detected"
+                        )
+
                    callback.progress("retrieve_all_slim_documents", 1)

        yield doc_metadata_list
--- a/backend/onyx/connectors/confluence/onyx_confluence.py
+++ b/backend/onyx/connectors/confluence/onyx_confluence.py
@@ -144,12 +144,6 @@ class OnyxConfluence:
            self.static_credentials = credential_json
            return credential_json, False

-        if not OAUTH_CONFLUENCE_CLOUD_CLIENT_ID:
-            raise RuntimeError("OAUTH_CONFLUENCE_CLOUD_CLIENT_ID must be set!")
-
-        if not OAUTH_CONFLUENCE_CLOUD_CLIENT_SECRET:
-            raise RuntimeError("OAUTH_CONFLUENCE_CLOUD_CLIENT_SECRET must be set!")
-
        # check if we should refresh tokens. we're deciding to refresh halfway
        # to expiration
        now = datetime.now(timezone.utc)
--- a/backend/onyx/connectors/confluence/utils.py
+++ b/backend/onyx/connectors/confluence/utils.py
@@ -1,12 +1,9 @@
-import io
 import math
 import time
 from collections.abc import Callable
 from datetime import datetime
 from datetime import timedelta
 from datetime import timezone
-from io import BytesIO
-from pathlib import Path
 from typing import Any
 from typing import cast
 from typing import TYPE_CHECKING
@@ -15,28 +12,14 @@ from urllib.parse import parse_qs
 from urllib.parse import quote
 from urllib.parse import urlparse

+import bs4
 import requests
 from pydantic import BaseModel
-from sqlalchemy.orm import Session

-from onyx.configs.app_configs import (
-    CONFLUENCE_CONNECTOR_ATTACHMENT_CHAR_COUNT_THRESHOLD,
-)
-from onyx.configs.constants import FileOrigin
+from onyx.utils.logger import setup_logger

 if TYPE_CHECKING:
-    from onyx.connectors.confluence.onyx_confluence import OnyxConfluence
-
-from onyx.db.engine import get_session_with_current_tenant
-from onyx.db.models import PGFileStore
-from onyx.db.pg_file_store import create_populate_lobj
-from onyx.db.pg_file_store import save_bytes_to_pgfilestore
-from onyx.db.pg_file_store import upsert_pgfilestore
-from onyx.file_processing.extract_file_text import extract_file_text
-from onyx.file_processing.file_validation import is_valid_image_type
-from onyx.file_processing.image_utils import store_image_and_create_section
-from onyx.llm.interfaces import LLM
-from onyx.utils.logger import setup_logger
+    pass

 logger = setup_logger()

@@ -52,229 +35,15 @@ class TokenResponse(BaseModel):
    scope: str


-def validate_attachment_filetype(
-    attachment: dict[str, Any], llm: LLM | None = None
-) -> bool:
-    """
-    Validates if the attachment is a supported file type.
-    If LLM is provided, also checks if it's an image that can be processed.
-    """
-    attachment.get("metadata", {})
-    media_type = attachment.get("metadata", {}).get("mediaType", "")
-
-    if media_type.startswith("image/"):
-        return llm is not None and is_valid_image_type(media_type)
-
-    # For non-image files, check if we support the extension
-    title = attachment.get("title", "")
-    extension = Path(title).suffix.lstrip(".").lower() if "." in title else ""
-    return extension in ["pdf", "doc", "docx", "txt", "md", "rtf"]
-
-
-class AttachmentProcessingResult(BaseModel):
-    """
-    A container for results after processing a Confluence attachment.
-    'text' is the textual content of the attachment.
-    'file_name' is the final file name used in PGFileStore to store the content.
-    'error' holds an exception or string if something failed.
-    """
-
-    text: str | None
-    file_name: str | None
-    error: str | None = None
-
-
-def _download_attachment(
-    confluence_client: "OnyxConfluence", attachment: dict[str, Any]
-) -> bytes | None:
-    """
-    Retrieves the raw bytes of an attachment from Confluence. Returns None on error.
-    """
-    download_link = confluence_client.url + attachment["_links"]["download"]
-    resp = confluence_client._session.get(download_link)
-    if resp.status_code != 200:
-        logger.warning(
-            f"Failed to fetch {download_link} with status code {resp.status_code}"
-        )
-        return None
-    return resp.content
-
-
-def process_attachment(
-    confluence_client: "OnyxConfluence",
-    attachment: dict[str, Any],
-    page_context: str,
-    llm: LLM | None,
-) -> AttachmentProcessingResult:
-    """
-    Processes a Confluence attachment. If it's a document, extracts text,
-    or if it's an image and an LLM is available, summarizes it. Returns a structured result.
-    """
-    try:
-        # Get the media type from the attachment metadata
-        media_type = attachment.get("metadata", {}).get("mediaType", "")
-
-        # Validate the attachment type
-        if not validate_attachment_filetype(attachment, llm):
-            return AttachmentProcessingResult(
-                text=None,
-                file_name=None,
-                error=f"Unsupported file type: {media_type}",
-            )
-
-        # Download the attachment
-        raw_bytes = _download_attachment(confluence_client, attachment)
-        if raw_bytes is None:
-            return AttachmentProcessingResult(
-                text=None, file_name=None, error="Failed to download attachment"
-            )
-
-        # Process image attachments with LLM if available
-        if media_type.startswith("image/") and llm:
-            return _process_image_attachment(
-                confluence_client, attachment, page_context, llm, raw_bytes, media_type
-            )
-
-        # Process document attachments
-        try:
-            text = extract_file_text(
-                file=BytesIO(raw_bytes),
-                file_name=attachment["title"],
-            )
-
-            # Skip if the text is too long
-            if len(text) > CONFLUENCE_CONNECTOR_ATTACHMENT_CHAR_COUNT_THRESHOLD:
-                return AttachmentProcessingResult(
-                    text=None,
-                    file_name=None,
-                    error=f"Attachment text too long: {len(text)} chars",
-                )
-
-            return AttachmentProcessingResult(text=text, file_name=None, error=None)
-        except Exception as e:
-            return AttachmentProcessingResult(
-                text=None, file_name=None, error=f"Failed to extract text: {e}"
-            )
-
-    except Exception as e:
-        return AttachmentProcessingResult(
-            text=None, file_name=None, error=f"Failed to process attachment: {e}"
-        )
-
-
-def _process_image_attachment(
-    confluence_client: "OnyxConfluence",
-    attachment: dict[str, Any],
-    page_context: str,
-    llm: LLM,
-    raw_bytes: bytes,
-    media_type: str,
-) -> AttachmentProcessingResult:
-    """Process an image attachment by saving it and generating a summary."""
-    try:
-        # Use the standardized image storage and section creation
-        with get_session_with_current_tenant() as db_session:
-            section, file_name = store_image_and_create_section(
-                db_session=db_session,
-                image_data=raw_bytes,
-                file_name=Path(attachment["id"]).name,
-                display_name=attachment["title"],
-                media_type=media_type,
-                llm=llm,
-                file_origin=FileOrigin.CONNECTOR,
-            )
-
-            return AttachmentProcessingResult(
-                text=section.text, file_name=file_name, error=None
-            )
-    except Exception as e:
-        msg = f"Image summarization failed for {attachment['title']}: {e}"
-        logger.error(msg, exc_info=e)
-        return AttachmentProcessingResult(text=None, file_name=None, error=msg)
-
-
-def _process_text_attachment(
-    attachment: dict[str, Any],
-    raw_bytes: bytes,
-    media_type: str,
-) -> AttachmentProcessingResult:
-    """Process a text-based attachment by extracting its content."""
-    try:
-        extracted_text = extract_file_text(
-            io.BytesIO(raw_bytes),
-            file_name=attachment["title"],
-            break_on_unprocessable=False,
-        )
-    except Exception as e:
-        msg = f"Failed to extract text for '{attachment['title']}': {e}"
-        logger.error(msg, exc_info=e)
-        return AttachmentProcessingResult(text=None, file_name=None, error=msg)
-
-    # Check length constraints
-    if extracted_text is None or len(extracted_text) == 0:
-        msg = f"No text extracted for {attachment['title']}"
-        logger.warning(msg)
-        return AttachmentProcessingResult(text=None, file_name=None, error=msg)
-
-    if len(extracted_text) > CONFLUENCE_CONNECTOR_ATTACHMENT_CHAR_COUNT_THRESHOLD:
-        msg = (
-            f"Skipping attachment {attachment['title']} due to char count "
-            f"({len(extracted_text)} > {CONFLUENCE_CONNECTOR_ATTACHMENT_CHAR_COUNT_THRESHOLD})"
-        )
-        logger.warning(msg)
-        return AttachmentProcessingResult(text=None, file_name=None, error=msg)
-
-    # Save the attachment
-    try:
-        with get_session_with_current_tenant() as db_session:
-            saved_record = save_bytes_to_pgfilestore(
-                db_session=db_session,
-                raw_bytes=raw_bytes,
-                media_type=media_type,
-                identifier=attachment["id"],
-                display_name=attachment["title"],
-            )
-    except Exception as e:
-        msg = f"Failed to save attachment '{attachment['title']}' to PG: {e}"
-        logger.error(msg, exc_info=e)
-        return AttachmentProcessingResult(
-            text=extracted_text, file_name=None, error=msg
-        )
-
-    return AttachmentProcessingResult(
-        text=extracted_text, file_name=saved_record.file_name, error=None
-    )
-
-
-def convert_attachment_to_content(
-    confluence_client: "OnyxConfluence",
-    attachment: dict[str, Any],
-    page_context: str,
-    llm: LLM | None,
-) -> tuple[str | None, str | None] | None:
-    """
-    Facade function which:
-      1. Validates attachment type
-      2. Extracts or summarizes content
-      3. Returns (content_text, stored_file_name) or None if we should skip it
-    """
-    media_type = attachment["metadata"]["mediaType"]
-    # Quick check for unsupported types:
-    if media_type.startswith("video/") or media_type == "application/gliffy+json":
-        logger.warning(
-            f"Skipping unsupported attachment type: '{media_type}' for {attachment['title']}"
-        )
-        return None
-
-    result = process_attachment(confluence_client, attachment, page_context, llm)
-    if result.error is not None:
-        logger.warning(
-            f"Attachment {attachment['title']} encountered error: {result.error}"
-        )
-        return None
-
-    # Return the text and the file name
-    return result.text, result.file_name
+def validate_attachment_filetype(attachment: dict[str, Any]) -> bool:
+    return attachment["metadata"]["mediaType"] not in [
+        "image/jpeg",
+        "image/png",
+        "image/gif",
+        "image/svg+xml",
+        "video/mp4",
+        "video/quicktime",
+    ]


 def build_confluence_document_id(
@@ -295,6 +64,23 @@ def build_confluence_document_id(
    return f"{base_url}{content_url}"


+def _extract_referenced_attachment_names(page_text: str) -> list[str]:
+    """Parse a Confluence html page to generate a list of current
+        attachments in use
+
+    Args:
+        text (str): The page content
+
+    Returns:
+        list[str]: List of filenames currently in use by the page text
+    """
+    referenced_attachment_filenames = []
+    soup = bs4.BeautifulSoup(page_text, "html.parser")
+    for attachment in soup.findAll("ri:attachment"):
+        referenced_attachment_filenames.append(attachment.attrs["ri:filename"])
+    return referenced_attachment_filenames
+
+
 def datetime_from_string(datetime_string: str) -> datetime:
    datetime_object = datetime.fromisoformat(datetime_string)

@@ -466,37 +252,3 @@ def update_param_in_path(path: str, param: str, value: str) -> str:
        + "?"
        + "&".join(f"{k}={quote(v[0])}" for k, v in query_params.items())
    )
-
-
-def attachment_to_file_record(
-    confluence_client: "OnyxConfluence",
-    attachment: dict[str, Any],
-    db_session: Session,
-) -> tuple[PGFileStore, bytes]:
-    """Save an attachment to the file store and return the file record."""
-    download_link = _attachment_to_download_link(confluence_client, attachment)
-    image_data = confluence_client.get(
-        download_link, absolute=True, not_json_response=True
-    )
-
-    # Save image to file store
-    file_name = f"confluence_attachment_{attachment['id']}"
-    lobj_oid = create_populate_lobj(BytesIO(image_data), db_session)
-    pgfilestore = upsert_pgfilestore(
-        file_name=file_name,
-        display_name=attachment["title"],
-        file_origin=FileOrigin.OTHER,
-        file_type=attachment["metadata"]["mediaType"],
-        lobj_oid=lobj_oid,
-        db_session=db_session,
-        commit=True,
-    )
-
-    return pgfilestore, image_data
-
-
-def _attachment_to_download_link(
-    confluence_client: "OnyxConfluence", attachment: dict[str, Any]
-) -> str:
-    """Extracts the download link to images."""
-    return confluence_client.url + attachment["_links"]["download"]
--- a/backend/onyx/connectors/file/connector.py
+++ b/backend/onyx/connectors/file/connector.py
@@ -10,23 +10,22 @@ from sqlalchemy.orm import Session

 from onyx.configs.app_configs import INDEX_BATCH_SIZE
 from onyx.configs.constants import DocumentSource
-from onyx.configs.constants import FileOrigin
 from onyx.connectors.cross_connector_utils.miscellaneous_utils import time_str_to_utc
 from onyx.connectors.interfaces import GenerateDocumentsOutput
 from onyx.connectors.interfaces import LoadConnector
 from onyx.connectors.models import BasicExpertInfo
 from onyx.connectors.models import Document
 from onyx.connectors.models import Section
-from onyx.connectors.vision_enabled_connector import VisionEnabledConnector
 from onyx.db.engine import get_session_with_current_tenant
-from onyx.db.pg_file_store import get_pgfilestore_by_file_name
-from onyx.file_processing.extract_file_text import extract_text_and_images
+from onyx.file_processing.extract_file_text import detect_encoding
+from onyx.file_processing.extract_file_text import extract_file_text
 from onyx.file_processing.extract_file_text import get_file_ext
+from onyx.file_processing.extract_file_text import is_text_file_extension
 from onyx.file_processing.extract_file_text import is_valid_file_ext
 from onyx.file_processing.extract_file_text import load_files_from_zip
-from onyx.file_processing.image_utils import store_image_and_create_section
+from onyx.file_processing.extract_file_text import read_pdf_file
+from onyx.file_processing.extract_file_text import read_text_file
 from onyx.file_store.file_store import get_default_file_store
-from onyx.llm.interfaces import LLM
 from onyx.utils.logger import setup_logger

 logger = setup_logger()
@@ -36,115 +35,81 @@ def _read_files_and_metadata(
    file_name: str,
    db_session: Session,
 ) -> Iterator[tuple[str, IO, dict[str, Any]]]:
-    """
-    Reads the file from Postgres. If the file is a .zip, yields subfiles.
-    """
+    """Reads the file into IO, in the case of a zip file, yields each individual
+    file contained within, also includes the metadata dict if packaged in the zip"""
    extension = get_file_ext(file_name)
    metadata: dict[str, Any] = {}
    directory_path = os.path.dirname(file_name)

-    # Read file from Postgres store
    file_content = get_default_file_store(db_session).read_file(file_name, mode="b")

-    # If it's a zip, expand it
    if extension == ".zip":
-        for file_info, subfile, metadata in load_files_from_zip(
+        for file_info, file, metadata in load_files_from_zip(
            file_content, ignore_dirs=True
        ):
-            yield os.path.join(directory_path, file_info.filename), subfile, metadata
+            yield os.path.join(directory_path, file_info.filename), file, metadata
    elif is_valid_file_ext(extension):
        yield file_name, file_content, metadata
    else:
        logger.warning(f"Skipping file '{file_name}' with extension '{extension}'")


-def _create_image_section(
-    llm: LLM | None,
-    image_data: bytes,
-    db_session: Session,
-    parent_file_name: str,
-    display_name: str,
-    idx: int = 0,
-) -> tuple[Section, str | None]:
-    """
-    Create a Section object for a single image and store the image in PGFileStore.
-    If summarization is enabled and we have an LLM, summarize the image.
-
-    Returns:
-        tuple: (Section object, file_name in PGFileStore or None if storage failed)
-    """
-    # Create a unique file name for the embedded image
-    file_name = f"{parent_file_name}_embedded_{idx}"
-
-    # Use the standardized utility to store the image and create a section
-    return store_image_and_create_section(
-        db_session=db_session,
-        image_data=image_data,
-        file_name=file_name,
-        display_name=display_name,
-        llm=llm,
-        file_origin=FileOrigin.OTHER,
-    )
-
-
 def _process_file(
    file_name: str,
    file: IO[Any],
-    metadata: dict[str, Any] | None,
-    pdf_pass: str | None,
-    db_session: Session,
-    llm: LLM | None,
+    metadata: dict[str, Any] | None = None,
+    pdf_pass: str | None = None,
 ) -> list[Document]:
-    """
-    Processes a single file, returning a list of Documents (typically one).
-    Also handles embedded images if 'EMBEDDED_IMAGE_EXTRACTION_ENABLED' is true.
-    """
    extension = get_file_ext(file_name)
-
-    # Fetch the DB record so we know the ID for internal URL
-    pg_record = get_pgfilestore_by_file_name(file_name=file_name, db_session=db_session)
-    if not pg_record:
-        logger.warning(f"No file record found for '{file_name}' in PG; skipping.")
-        return []
-
    if not is_valid_file_ext(extension):
-        logger.warning(
-            f"Skipping file '{file_name}' with unrecognized extension '{extension}'"
-        )
+        logger.warning(f"Skipping file '{file_name}' with extension '{extension}'")
        return []

-    # Prepare doc metadata
-    if metadata is None:
-        metadata = {}
-    file_display_name = metadata.get("file_display_name") or os.path.basename(file_name)
+    file_metadata: dict[str, Any] = {}

-    # Timestamps
-    current_datetime = datetime.now(timezone.utc)
-    time_updated = metadata.get("time_updated", current_datetime)
+    if is_text_file_extension(file_name):
+        encoding = detect_encoding(file)
+        file_content_raw, file_metadata = read_text_file(
+            file, encoding=encoding, ignore_onyx_metadata=False
+        )
+
+    # Using the PDF reader function directly to pass in password cleanly
+    elif extension == ".pdf" and pdf_pass is not None:
+        file_content_raw, file_metadata = read_pdf_file(file=file, pdf_pass=pdf_pass)
+
+    else:
+        file_content_raw = extract_file_text(
+            file=file,
+            file_name=file_name,
+            break_on_unprocessable=True,
+        )
+
+    all_metadata = {**metadata, **file_metadata} if metadata else file_metadata
+
+    # add a prefix to avoid conflicts with other connectors
+    doc_id = f"FILE_CONNECTOR__{file_name}"
+    if metadata:
+        doc_id = metadata.get("document_id") or doc_id
+
+    # If this is set, we will show this in the UI as the "name" of the file
+    file_display_name = all_metadata.get("file_display_name") or os.path.basename(
+        file_name
+    )
+    title = (
+        all_metadata["title"] or "" if "title" in all_metadata else file_display_name
+    )
+
+    time_updated = all_metadata.get("time_updated", datetime.now(timezone.utc))
    if isinstance(time_updated, str):
        time_updated = time_str_to_utc(time_updated)

-    dt_str = metadata.get("doc_updated_at")
+    dt_str = all_metadata.get("doc_updated_at")
    final_time_updated = time_str_to_utc(dt_str) if dt_str else time_updated

-    # Collect owners
-    p_owner_names = metadata.get("primary_owners")
-    s_owner_names = metadata.get("secondary_owners")
-    p_owners = (
-        [BasicExpertInfo(display_name=name) for name in p_owner_names]
-        if p_owner_names
-        else None
-    )
-    s_owners = (
-        [BasicExpertInfo(display_name=name) for name in s_owner_names]
-        if s_owner_names
-        else None
-    )
-
-    # Additional tags we store as doc metadata
+    # Metadata tags separate from the Onyx specific fields
    metadata_tags = {
        k: v
-        for k, v in metadata.items()
+        for k, v in all_metadata.items()
        if k
        not in [
            "document_id",
@@ -157,142 +122,77 @@ def _process_file(
            "file_display_name",
            "title",
            "connector_type",
-            "pdf_password",
        ]
    }

-    source_type_str = metadata.get("connector_type")
-    source_type = (
-        DocumentSource(source_type_str) if source_type_str else DocumentSource.FILE
+    source_type_str = all_metadata.get("connector_type")
+    source_type = DocumentSource(source_type_str) if source_type_str else None
+
+    p_owner_names = all_metadata.get("primary_owners")
+    s_owner_names = all_metadata.get("secondary_owners")
+    p_owners = (
+        [BasicExpertInfo(display_name=name) for name in p_owner_names]
+        if p_owner_names
+        else None
+    )
+    s_owners = (
+        [BasicExpertInfo(display_name=name) for name in s_owner_names]
+        if s_owner_names
+        else None
    )

-    doc_id = metadata.get("document_id") or f"FILE_CONNECTOR__{file_name}"
-    title = metadata.get("title") or file_display_name
-
-    # 1) If the file itself is an image, handle that scenario quickly
-    IMAGE_EXTENSIONS = {".jpg", ".jpeg", ".png", ".webp"}
-    if extension in IMAGE_EXTENSIONS:
-        # Summarize or produce empty doc
-        image_data = file.read()
-        image_section, _ = _create_image_section(
-            llm, image_data, db_session, pg_record.file_name, title
-        )
-        return [
-            Document(
-                id=doc_id,
-                sections=[image_section],
-                source=source_type,
-                semantic_identifier=file_display_name,
-                title=title,
-                doc_updated_at=final_time_updated,
-                primary_owners=p_owners,
-                secondary_owners=s_owners,
-                metadata=metadata_tags,
-            )
-        ]
-
-    # 2) Otherwise: text-based approach. Possibly with embedded images if enabled.
-    #    (For example .docx with inline images).
-    file.seek(0)
-    text_content = ""
-    embedded_images: list[tuple[bytes, str]] = []
-
-    text_content, embedded_images = extract_text_and_images(
-        file=file,
-        file_name=file_name,
-        pdf_pass=pdf_pass,
-    )
-
-    # Build sections: first the text as a single Section
-    sections = []
-    link_in_meta = metadata.get("link")
-    if text_content.strip():
-        sections.append(Section(link=link_in_meta, text=text_content.strip()))
-
-    # Then any extracted images from docx, etc.
-    for idx, (img_data, img_name) in enumerate(embedded_images, start=1):
-        # Store each embedded image as a separate file in PGFileStore
-        # and create a section with the image summary
-        image_section, _ = _create_image_section(
-            llm,
-            img_data,
-            db_session,
-            pg_record.file_name,
-            f"{title} - image {idx}",
-            idx,
-        )
-        sections.append(image_section)
    return [
        Document(
            id=doc_id,
-            sections=sections,
-            source=source_type,
+            sections=[
+                Section(link=all_metadata.get("link"), text=file_content_raw.strip())
+            ],
+            source=source_type or DocumentSource.FILE,
            semantic_identifier=file_display_name,
            title=title,
            doc_updated_at=final_time_updated,
            primary_owners=p_owners,
            secondary_owners=s_owners,
+            # currently metadata just houses tags, other stuff like owners / updated at have dedicated fields
            metadata=metadata_tags,
        )
    ]


-class LocalFileConnector(LoadConnector, VisionEnabledConnector):
-    """
-    Connector that reads files from Postgres and yields Documents, including
-    optional embedded image extraction.
-    """
-
+class LocalFileConnector(LoadConnector):
    def __init__(
        self,
        file_locations: list[Path | str],
        batch_size: int = INDEX_BATCH_SIZE,
    ) -> None:
-        self.file_locations = [str(loc) for loc in file_locations]
+        self.file_locations = [Path(file_location) for file_location in file_locations]
        self.batch_size = batch_size
        self.pdf_pass: str | None = None

-        # Initialize vision LLM using the mixin
-        self.initialize_vision_llm()
-
    def load_credentials(self, credentials: dict[str, Any]) -> dict[str, Any] | None:
        self.pdf_pass = credentials.get("pdf_password")
-
        return None

    def load_from_state(self) -> GenerateDocumentsOutput:
-        """
-        Iterates over each file path, fetches from Postgres, tries to parse text
-        or images, and yields Document batches.
-        """
        documents: list[Document] = []

        with get_session_with_current_tenant() as db_session:
            for file_path in self.file_locations:
                current_datetime = datetime.now(timezone.utc)
-
-                files_iter = _read_files_and_metadata(
-                    file_name=file_path,
-                    db_session=db_session,
+                files = _read_files_and_metadata(
+                    file_name=str(file_path), db_session=db_session
                )

-                for actual_file_name, file, metadata in files_iter:
+                for file_name, file, metadata in files:
                    metadata["time_updated"] = metadata.get(
                        "time_updated", current_datetime
                    )
-                    new_docs = _process_file(
-                        file_name=actual_file_name,
-                        file=file,
-                        metadata=metadata,
-                        pdf_pass=self.pdf_pass,
-                        db_session=db_session,
-                        llm=self.image_analysis_llm,
+                    documents.extend(
+                        _process_file(file_name, file, metadata, self.pdf_pass)
                    )
-                    documents.extend(new_docs)

                    if len(documents) >= self.batch_size:
                        yield documents
-
                        documents = []

            if documents:
@@ -301,7 +201,7 @@ class LocalFileConnector(LoadConnector, VisionEnabledConnector):

 if __name__ == "__main__":
    connector = LocalFileConnector(file_locations=[os.environ["TEST_FILE"]])
-    connector.load_credentials({"pdf_password": os.environ.get("PDF_PASSWORD")})
-    doc_batches = connector.load_from_state()
-    for batch in doc_batches:
-        print("BATCH:", batch)
+    connector.load_credentials({"pdf_password": os.environ["PDF_PASSWORD"]})
+
+    document_batches = connector.load_from_state()
+    print(next(document_batches))
--- a/backend/onyx/connectors/github/connector.py
+++ b/backend/onyx/connectors/github/connector.py
@@ -124,14 +124,14 @@ class GithubConnector(LoadConnector, PollConnector):
    def __init__(
        self,
        repo_owner: str,
-        repositories: str | None = None,
+        repo_name: str | None = None,
        batch_size: int = INDEX_BATCH_SIZE,
        state_filter: str = "all",
        include_prs: bool = True,
        include_issues: bool = False,
    ) -> None:
        self.repo_owner = repo_owner
-        self.repositories = repositories
+        self.repo_name = repo_name
        self.batch_size = batch_size
        self.state_filter = state_filter
        self.include_prs = include_prs
@@ -157,42 +157,11 @@ class GithubConnector(LoadConnector, PollConnector):
            )

        try:
-            return github_client.get_repo(f"{self.repo_owner}/{self.repositories}")
+            return github_client.get_repo(f"{self.repo_owner}/{self.repo_name}")
        except RateLimitExceededException:
            _sleep_after_rate_limit_exception(github_client)
            return self._get_github_repo(github_client, attempt_num + 1)

-    def _get_github_repos(
-        self, github_client: Github, attempt_num: int = 0
-    ) -> list[Repository.Repository]:
-        """Get specific repositories based on comma-separated repo_name string."""
-        if attempt_num > _MAX_NUM_RATE_LIMIT_RETRIES:
-            raise RuntimeError(
-                "Re-tried fetching repos too many times. Something is going wrong with fetching objects from Github"
-            )
-
-        try:
-            repos = []
-            # Split repo_name by comma and strip whitespace
-            repo_names = [
-                name.strip() for name in (cast(str, self.repositories)).split(",")
-            ]
-
-            for repo_name in repo_names:
-                if repo_name:  # Skip empty strings
-                    try:
-                        repo = github_client.get_repo(f"{self.repo_owner}/{repo_name}")
-                        repos.append(repo)
-                    except GithubException as e:
-                        logger.warning(
-                            f"Could not fetch repo {self.repo_owner}/{repo_name}: {e}"
-                        )
-
-            return repos
-        except RateLimitExceededException:
-            _sleep_after_rate_limit_exception(github_client)
-            return self._get_github_repos(github_client, attempt_num + 1)
-
    def _get_all_repos(
        self, github_client: Github, attempt_num: int = 0
    ) -> list[Repository.Repository]:
@@ -220,17 +189,11 @@ class GithubConnector(LoadConnector, PollConnector):
        if self.github_client is None:
            raise ConnectorMissingCredentialError("GitHub")

-        repos = []
-        if self.repositories:
-            if "," in self.repositories:
-                # Multiple repositories specified
-                repos = self._get_github_repos(self.github_client)
-            else:
-                # Single repository (backward compatibility)
-                repos = [self._get_github_repo(self.github_client)]
-        else:
-            # All repositories
-            repos = self._get_all_repos(self.github_client)
+        repos = (
+            [self._get_github_repo(self.github_client)]
+            if self.repo_name
+            else self._get_all_repos(self.github_client)
+        )

        for repo in repos:
            if self.include_prs:
@@ -305,48 +268,11 @@ class GithubConnector(LoadConnector, PollConnector):
            )

        try:
-            if self.repositories:
-                if "," in self.repositories:
-                    # Multiple repositories specified
-                    repo_names = [name.strip() for name in self.repositories.split(",")]
-                    if not repo_names:
-                        raise ConnectorValidationError(
-                            "Invalid connector settings: No valid repository names provided."
-                        )
-
-                    # Validate at least one repository exists and is accessible
-                    valid_repos = False
-                    validation_errors = []
-
-                    for repo_name in repo_names:
-                        if not repo_name:
-                            continue
-
-                        try:
-                            test_repo = self.github_client.get_repo(
-                                f"{self.repo_owner}/{repo_name}"
-                            )
-                            test_repo.get_contents("")
-                            valid_repos = True
-                            # If at least one repo is valid, we can proceed
-                            break
-                        except GithubException as e:
-                            validation_errors.append(
-                                f"Repository '{repo_name}': {e.data.get('message', str(e))}"
-                            )
-
-                    if not valid_repos:
-                        error_msg = (
-                            "None of the specified repositories could be accessed: "
-                        )
-                        error_msg += ", ".join(validation_errors)
-                        raise ConnectorValidationError(error_msg)
-                else:
-                    # Single repository (backward compatibility)
-                    test_repo = self.github_client.get_repo(
-                        f"{self.repo_owner}/{self.repositories}"
-                    )
-                    test_repo.get_contents("")
+            if self.repo_name:
+                test_repo = self.github_client.get_repo(
+                    f"{self.repo_owner}/{self.repo_name}"
+                )
+                test_repo.get_contents("")
            else:
                # Try to get organization first
                try:
@@ -372,15 +298,10 @@ class GithubConnector(LoadConnector, PollConnector):
                    "Your GitHub token does not have sufficient permissions for this repository (HTTP 403)."
                )
            elif e.status == 404:
-                if self.repositories:
-                    if "," in self.repositories:
-                        raise ConnectorValidationError(
-                            f"None of the specified GitHub repositories could be found for owner: {self.repo_owner}"
-                        )
-                    else:
-                        raise ConnectorValidationError(
-                            f"GitHub repository not found with name: {self.repo_owner}/{self.repositories}"
-                        )
+                if self.repo_name:
+                    raise ConnectorValidationError(
+                        f"GitHub repository not found with name: {self.repo_owner}/{self.repo_name}"
+                    )
                else:
                    raise ConnectorValidationError(
                        f"GitHub user or organization not found: {self.repo_owner}"
@@ -389,7 +310,6 @@ class GithubConnector(LoadConnector, PollConnector):
                raise ConnectorValidationError(
                    f"Unexpected GitHub error (status={e.status}): {e.data}"
                )
-
        except Exception as exc:
            raise Exception(
                f"Unexpected error during GitHub settings validation: {exc}"
@@ -401,7 +321,7 @@ if __name__ == "__main__":

    connector = GithubConnector(
        repo_owner=os.environ["REPO_OWNER"],
-        repositories=os.environ["REPOSITORIES"],
+        repo_name=os.environ["REPO_NAME"],
    )
    connector.load_credentials(
        {"github_access_token": os.environ["GITHUB_ACCESS_TOKEN"]}
--- a/backend/onyx/connectors/google_drive/connector.py
+++ b/backend/onyx/connectors/google_drive/connector.py
@@ -4,12 +4,14 @@ from concurrent.futures import as_completed
 from concurrent.futures import ThreadPoolExecutor
 from functools import partial
 from typing import Any
+from typing import cast

 from google.oauth2.credentials import Credentials as OAuthCredentials  # type: ignore
 from google.oauth2.service_account import Credentials as ServiceAccountCredentials  # type: ignore
 from googleapiclient.errors import HttpError  # type: ignore

 from onyx.configs.app_configs import INDEX_BATCH_SIZE
+from onyx.configs.app_configs import MAX_FILE_SIZE_BYTES
 from onyx.configs.constants import DocumentSource
 from onyx.connectors.exceptions import ConnectorValidationError
 from onyx.connectors.exceptions import CredentialExpiredError
@@ -34,6 +36,7 @@ from onyx.connectors.google_utils.shared_constants import (
 )
 from onyx.connectors.google_utils.shared_constants import MISSING_SCOPES_ERROR_STR
 from onyx.connectors.google_utils.shared_constants import ONYX_SCOPE_INSTRUCTIONS
+from onyx.connectors.google_utils.shared_constants import SCOPE_DOC_URL
 from onyx.connectors.google_utils.shared_constants import SLIM_BATCH_SIZE
 from onyx.connectors.google_utils.shared_constants import USER_FIELDS
 from onyx.connectors.interfaces import GenerateDocumentsOutput
@@ -43,9 +46,7 @@ from onyx.connectors.interfaces import PollConnector
 from onyx.connectors.interfaces import SecondsSinceUnixEpoch
 from onyx.connectors.interfaces import SlimConnector
 from onyx.connectors.models import ConnectorMissingCredentialError
-from onyx.connectors.vision_enabled_connector import VisionEnabledConnector
 from onyx.indexing.indexing_heartbeat import IndexingHeartbeatInterface
-from onyx.llm.interfaces import LLM
 from onyx.utils.logger import setup_logger
 from onyx.utils.retry_wrapper import retry_builder

@@ -65,10 +66,7 @@ def _extract_ids_from_urls(urls: list[str]) -> list[str]:


 def _convert_single_file(
-    creds: Any,
-    primary_admin_email: str,
-    file: dict[str, Any],
-    image_analysis_llm: LLM | None,
+    creds: Any, primary_admin_email: str, file: dict[str, Any]
 ) -> Any:
    user_email = file.get("owners", [{}])[0].get("emailAddress") or primary_admin_email
    user_drive_service = get_drive_service(creds, user_email=user_email)
@@ -77,14 +75,11 @@ def _convert_single_file(
        file=file,
        drive_service=user_drive_service,
        docs_service=docs_service,
-        image_analysis_llm=image_analysis_llm,  # pass the LLM so doc_conversion can summarize images
    )


 def _process_files_batch(
-    files: list[GoogleDriveFileType],
-    convert_func: Callable[[GoogleDriveFileType], Any],
-    batch_size: int,
+    files: list[GoogleDriveFileType], convert_func: Callable, batch_size: int
 ) -> GenerateDocumentsOutput:
    doc_batch = []
    with ThreadPoolExecutor(max_workers=min(16, len(files))) as executor:
@@ -116,9 +111,7 @@ def _clean_requested_drive_ids(
    return valid_requested_drive_ids, filtered_folder_ids


-class GoogleDriveConnector(
-    LoadConnector, PollConnector, SlimConnector, VisionEnabledConnector
-):
+class GoogleDriveConnector(LoadConnector, PollConnector, SlimConnector):
    def __init__(
        self,
        include_shared_drives: bool = False,
@@ -136,23 +129,23 @@ class GoogleDriveConnector(
        continue_on_failure: bool | None = None,
    ) -> None:
        # Check for old input parameters
-        if folder_paths is not None:
-            logger.warning(
-                "The 'folder_paths' parameter is deprecated. Use 'shared_folder_urls' instead."
+        if (
+            folder_paths is not None
+            or include_shared is not None
+            or follow_shortcuts is not None
+            or only_org_public is not None
+            or continue_on_failure is not None
+        ):
+            logger.exception(
+                "Google Drive connector received old input parameters. "
+                "Please visit the docs for help with the new setup: "
+                f"{SCOPE_DOC_URL}"
            )
-        if include_shared is not None:
-            logger.warning(
-                "The 'include_shared' parameter is deprecated. Use 'include_files_shared_with_me' instead."
+            raise ConnectorValidationError(
+                "Google Drive connector received old input parameters. "
+                "Please visit the docs for help with the new setup: "
+                f"{SCOPE_DOC_URL}"
            )
-        if follow_shortcuts is not None:
-            logger.warning("The 'follow_shortcuts' parameter is deprecated.")
-        if only_org_public is not None:
-            logger.warning("The 'only_org_public' parameter is deprecated.")
-        if continue_on_failure is not None:
-            logger.warning("The 'continue_on_failure' parameter is deprecated.")
-
-        # Initialize vision LLM using the mixin
-        self.initialize_vision_llm()

        if (
            not include_shared_drives
@@ -244,7 +237,6 @@ class GoogleDriveConnector(
            credentials=credentials,
            source=DocumentSource.GOOGLE_DRIVE,
        )
-
        return new_creds_dict

    def _update_traversed_parent_ids(self, folder_id: str) -> None:
@@ -316,9 +308,7 @@ class GoogleDriveConnector(
        # validate that the user has access to the drive APIs by performing a simple
        # request and checking for a 401
        try:
-            # default is ~17mins of retries, don't do that here for cases so we don't
-            # waste 17mins everytime we run into a user without access to drive APIs
-            retry_builder(tries=3, delay=1)(get_root_folder_id)(drive_service)
+            retry_builder()(get_root_folder_id)(drive_service)
        except HttpError as e:
            if e.status_code == 401:
                # fail gracefully, let the other impersonations continue
@@ -533,53 +523,37 @@ class GoogleDriveConnector(
        end: SecondsSinceUnixEpoch | None = None,
    ) -> GenerateDocumentsOutput:
        # Create a larger process pool for file conversion
-        with ThreadPoolExecutor(max_workers=8) as executor:
-            # Prepare a partial function with the credentials and admin email
-            convert_func = partial(
-                _convert_single_file,
-                self.creds,
-                self.primary_admin_email,
-                image_analysis_llm=self.image_analysis_llm,  # Use the mixin's LLM
+        convert_func = partial(
+            _convert_single_file, self.creds, self.primary_admin_email
+        )
+
+        # Process files in larger batches
+        LARGE_BATCH_SIZE = self.batch_size * 4
+        files_to_process = []
+        # Gather the files into batches to be processed in parallel
+        for file in self._fetch_drive_items(is_slim=False, start=start, end=end):
+            if (
+                file.get("size")
+                and int(cast(str, file.get("size"))) > MAX_FILE_SIZE_BYTES
+            ):
+                logger.warning(
+                    f"Skipping file {file.get('name', 'Unknown')} as it is too large: {file.get('size')} bytes"
+                )
+                continue
+
+            files_to_process.append(file)
+            if len(files_to_process) >= LARGE_BATCH_SIZE:
+                yield from _process_files_batch(
+                    files_to_process, convert_func, self.batch_size
+                )
+                files_to_process = []
+
+        # Process any remaining files
+        if files_to_process:
+            yield from _process_files_batch(
+                files_to_process, convert_func, self.batch_size
            )

-            # Fetch files in batches
-            files_batch: list[GoogleDriveFileType] = []
-            for file in self._fetch_drive_items(is_slim=False, start=start, end=end):
-                files_batch.append(file)
-
-                if len(files_batch) >= self.batch_size:
-                    # Process the batch
-                    futures = [
-                        executor.submit(convert_func, file) for file in files_batch
-                    ]
-                    documents = []
-                    for future in as_completed(futures):
-                        try:
-                            doc = future.result()
-                            if doc is not None:
-                                documents.append(doc)
-                        except Exception as e:
-                            logger.error(f"Error converting file: {e}")
-
-                    if documents:
-                        yield documents
-                    files_batch = []
-
-            # Process any remaining files
-            if files_batch:
-                futures = [executor.submit(convert_func, file) for file in files_batch]
-                documents = []
-                for future in as_completed(futures):
-                    try:
-                        doc = future.result()
-                        if doc is not None:
-                            documents.append(doc)
-                    except Exception as e:
-                        logger.error(f"Error converting file: {e}")
-
-                if documents:
-                    yield documents
-
    def load_from_state(self) -> GenerateDocumentsOutput:
        try:
            yield from self._extract_docs_from_google_drive()
--- a/backend/onyx/connectors/google_drive/doc_conversion.py
+++ b/backend/onyx/connectors/google_drive/doc_conversion.py
@@ -9,7 +9,7 @@ from googleapiclient.errors import HttpError  # type: ignore

 from onyx.configs.app_configs import CONTINUE_ON_CONNECTOR_FAILURE
 from onyx.configs.constants import DocumentSource
-from onyx.configs.constants import FileOrigin
+from onyx.configs.constants import IGNORE_FOR_QA
 from onyx.connectors.google_drive.constants import DRIVE_FOLDER_TYPE
 from onyx.connectors.google_drive.constants import DRIVE_SHORTCUT_TYPE
 from onyx.connectors.google_drive.constants import UNSUPPORTED_FILE_TYPE_CONTENT
@@ -21,88 +21,32 @@ from onyx.connectors.google_utils.resources import GoogleDriveService
 from onyx.connectors.models import Document
 from onyx.connectors.models import Section
 from onyx.connectors.models import SlimDocument
-from onyx.db.engine import get_session_with_current_tenant
-from onyx.file_processing.extract_file_text import docx_to_text_and_images
+from onyx.file_processing.extract_file_text import docx_to_text
 from onyx.file_processing.extract_file_text import pptx_to_text
 from onyx.file_processing.extract_file_text import read_pdf_file
-from onyx.file_processing.file_validation import is_valid_image_type
-from onyx.file_processing.image_summarization import summarize_image_with_error_handling
-from onyx.file_processing.image_utils import store_image_and_create_section
 from onyx.file_processing.unstructured import get_unstructured_api_key
 from onyx.file_processing.unstructured import unstructured_to_text
-from onyx.llm.interfaces import LLM
 from onyx.utils.logger import setup_logger

 logger = setup_logger()


-def _summarize_drive_image(
-    image_data: bytes, image_name: str, image_analysis_llm: LLM | None
-) -> str:
-    """
-    Summarize the given image using the provided LLM.
-    """
-    if not image_analysis_llm:
-        return ""
-
-    return (
-        summarize_image_with_error_handling(
-            llm=image_analysis_llm,
-            image_data=image_data,
-            context_name=image_name,
-        )
-        or ""
-    )
-
-
-def is_gdrive_image_mime_type(mime_type: str) -> bool:
-    """
-    Return True if the mime_type is a common image type in GDrive.
-    (e.g. 'image/png', 'image/jpeg')
-    """
-    return is_valid_image_type(mime_type)
+# these errors don't represent a failure in the connector, but simply files
+# that can't / shouldn't be indexed
+ERRORS_TO_CONTINUE_ON = [
+    "cannotExportFile",
+    "exportSizeLimitExceeded",
+    "cannotDownloadFile",
+]


 def _extract_sections_basic(
-    file: dict[str, str],
-    service: GoogleDriveService,
-    image_analysis_llm: LLM | None = None,
+    file: dict[str, str], service: GoogleDriveService
 ) -> list[Section]:
-    """
-    Extends the existing logic to handle either a docx with embedded images
-    or standalone images (PNG, JPG, etc).
-    """
    mime_type = file["mimeType"]
    link = file["webViewLink"]
-    file_name = file.get("name", file["id"])
    supported_file_types = set(item.value for item in GDriveMimeType)

-    # 1) If the file is an image, retrieve the raw bytes, optionally summarize
-    if is_gdrive_image_mime_type(mime_type):
-        try:
-            response = service.files().get_media(fileId=file["id"]).execute()
-
-            with get_session_with_current_tenant() as db_session:
-                section, _ = store_image_and_create_section(
-                    db_session=db_session,
-                    image_data=response,
-                    file_name=file["id"],
-                    display_name=file_name,
-                    media_type=mime_type,
-                    llm=image_analysis_llm,
-                    file_origin=FileOrigin.CONNECTOR,
-                )
-                return [section]
-        except Exception as e:
-            logger.warning(f"Failed to fetch or summarize image: {e}")
-            return [
-                Section(
-                    link=link,
-                    text="",
-                    image_file_name=link,
-                )
-            ]
-
    if mime_type not in supported_file_types:
        # Unsupported file types can still have a title, finding this way is still useful
        return [Section(link=link, text=UNSUPPORTED_FILE_TYPE_CONTENT)]
@@ -241,63 +185,45 @@ def _extract_sections_basic(
            GDriveMimeType.PLAIN_TEXT.value,
            GDriveMimeType.MARKDOWN.value,
        ]:
-            text_data = (
-                service.files().get_media(fileId=file["id"]).execute().decode("utf-8")
-            )
-            return [Section(link=link, text=text_data)]
-
+            return [
+                Section(
+                    link=link,
+                    text=service.files()
+                    .get_media(fileId=file["id"])
+                    .execute()
+                    .decode("utf-8"),
+                )
+            ]
        # ---------------------------
        # Word, PowerPoint, PDF files
-        elif mime_type in [
+        if mime_type in [
            GDriveMimeType.WORD_DOC.value,
            GDriveMimeType.POWERPOINT.value,
            GDriveMimeType.PDF.value,
        ]:
-            response_bytes = service.files().get_media(fileId=file["id"]).execute()
-
-            # Optionally use Unstructured
+            response = service.files().get_media(fileId=file["id"]).execute()
            if get_unstructured_api_key():
-                text = unstructured_to_text(
-                    file=io.BytesIO(response_bytes),
-                    file_name=file_name,
-                )
-                return [Section(link=link, text=text)]
+                return [
+                    Section(
+                        link=link,
+                        text=unstructured_to_text(
+                            file=io.BytesIO(response),
+                            file_name=file.get("name", file["id"]),
+                        ),
+                    )
+                ]

            if mime_type == GDriveMimeType.WORD_DOC.value:
-                # Use docx_to_text_and_images to get text plus embedded images
-                text, embedded_images = docx_to_text_and_images(
-                    file=io.BytesIO(response_bytes),
-                )
-                sections = []
-                if text.strip():
-                    sections.append(Section(link=link, text=text.strip()))
-
-                # Process each embedded image using the standardized function
-                with get_session_with_current_tenant() as db_session:
-                    for idx, (img_data, img_name) in enumerate(
-                        embedded_images, start=1
-                    ):
-                        # Create a unique identifier for the embedded image
-                        embedded_id = f"{file['id']}_embedded_{idx}"
-
-                        section, _ = store_image_and_create_section(
-                            db_session=db_session,
-                            image_data=img_data,
-                            file_name=embedded_id,
-                            display_name=img_name or f"{file_name} - image {idx}",
-                            llm=image_analysis_llm,
-                            file_origin=FileOrigin.CONNECTOR,
-                        )
-                        sections.append(section)
-                return sections
-
+                return [
+                    Section(link=link, text=docx_to_text(file=io.BytesIO(response)))
+                ]
            elif mime_type == GDriveMimeType.PDF.value:
-                text, _pdf_meta, images = read_pdf_file(io.BytesIO(response_bytes))
+                text, _ = read_pdf_file(file=io.BytesIO(response))
                return [Section(link=link, text=text)]
-
            elif mime_type == GDriveMimeType.POWERPOINT.value:
-                text_data = pptx_to_text(io.BytesIO(response_bytes))
-                return [Section(link=link, text=text_data)]
+                return [
+                    Section(link=link, text=pptx_to_text(file=io.BytesIO(response)))
+                ]

        # Catch-all case, should not happen since there should be specific handling
        # for each of the supported file types
@@ -305,8 +231,7 @@ def _extract_sections_basic(
        logger.error(error_message)
        raise ValueError(error_message)

-    except Exception as e:
-        logger.exception(f"Error extracting sections from file: {e}")
+    except Exception:
        return [Section(link=link, text=UNSUPPORTED_FILE_TYPE_CONTENT)]


@@ -314,62 +239,74 @@ def convert_drive_item_to_document(
    file: GoogleDriveFileType,
    drive_service: GoogleDriveService,
    docs_service: GoogleDocsService,
-    image_analysis_llm: LLM | None,
 ) -> Document | None:
-    """
-    Main entry point for converting a Google Drive file => Document object.
-    Now we accept an optional `llm` to pass to `_extract_sections_basic`.
-    """
    try:
-        # skip shortcuts or folders
-        if file.get("mimeType") in [DRIVE_SHORTCUT_TYPE, DRIVE_FOLDER_TYPE]:
-            logger.info("Skipping shortcut/folder.")
+        # Skip files that are shortcuts
+        if file.get("mimeType") == DRIVE_SHORTCUT_TYPE:
+            logger.info("Ignoring Drive Shortcut Filetype")
+            return None
+        # Skip files that are folders
+        if file.get("mimeType") == DRIVE_FOLDER_TYPE:
+            logger.info("Ignoring Drive Folder Filetype")
            return None

-        # If it's a Google Doc, we might do advanced parsing
        sections: list[Section] = []
+
+        # Special handling for Google Docs to preserve structure, link
+        # to headers
        if file.get("mimeType") == GDriveMimeType.DOC.value:
            try:
-                # get_document_sections is the advanced approach for Google Docs
                sections = get_document_sections(docs_service, file["id"])
            except Exception as e:
                logger.warning(
-                    f"Failed to pull google doc sections from '{file['name']}': {e}. "
-                    "Falling back to basic extraction."
+                    f"Ran into exception '{e}' when pulling sections from Google Doc '{file['name']}'."
+                    " Falling back to basic extraction."
                )
-
-        # If not a doc, or if we failed above, do our 'basic' approach
+        # NOTE: this will run for either (1) the above failed or (2) the file is not a Google Doc
        if not sections:
-            sections = _extract_sections_basic(file, drive_service, image_analysis_llm)
+            try:
+                # For all other file types just extract the text
+                sections = _extract_sections_basic(file, drive_service)

+            except HttpError as e:
+                reason = e.error_details[0]["reason"] if e.error_details else e.reason
+                message = e.error_details[0]["message"] if e.error_details else e.reason
+                if e.status_code == 403 and reason in ERRORS_TO_CONTINUE_ON:
+                    logger.warning(
+                        f"Could not export file '{file['name']}' due to '{message}', skipping..."
+                    )
+                    return None
+
+                raise
        if not sections:
            return None

-        doc_id = file["webViewLink"]
-        updated_time = datetime.fromisoformat(file["modifiedTime"]).astimezone(
-            timezone.utc
-        )
-
        return Document(
-            id=doc_id,
+            id=file["webViewLink"],
            sections=sections,
            source=DocumentSource.GOOGLE_DRIVE,
            semantic_identifier=file["name"],
-            doc_updated_at=updated_time,
-            metadata={},  # or any metadata from 'file'
+            doc_updated_at=datetime.fromisoformat(file["modifiedTime"]).astimezone(
+                timezone.utc
+            ),
+            metadata={}
+            if any(section.text for section in sections)
+            else {IGNORE_FOR_QA: "True"},
            additional_info=file.get("id"),
        )
-
    except Exception as e:
-        logger.exception(f"Error converting file '{file.get('name')}' to Document: {e}")
        if not CONTINUE_ON_CONNECTOR_FAILURE:
-            raise
+            raise e
+
+        logger.exception("Ran into exception when pulling a file from Google Drive")
    return None


 def build_slim_document(file: GoogleDriveFileType) -> SlimDocument | None:
+    # Skip files that are folders or shortcuts
    if file.get("mimeType") in [DRIVE_FOLDER_TYPE, DRIVE_SHORTCUT_TYPE]:
        return None
+
    return SlimDocument(
        id=file["webViewLink"],
        perm_sync_data={
--- a/backend/onyx/connectors/models.py
+++ b/backend/onyx/connectors/models.py
@@ -28,8 +28,7 @@ class ConnectorMissingCredentialError(PermissionError):

 class Section(BaseModel):
    text: str
-    link: str | None = None
-    image_file_name: str | None = None
+    link: str | None


 class BasicExpertInfo(BaseModel):
--- a/backend/onyx/connectors/vision_enabled_connector.py
+++ b/backend/onyx/connectors/vision_enabled_connector.py
@@ -1,45 +0,0 @@
-"""
-Mixin for connectors that need vision capabilities.
-"""
-from onyx.configs.llm_configs import get_image_extraction_and_analysis_enabled
-from onyx.llm.factory import get_default_llm_with_vision
-from onyx.llm.interfaces import LLM
-from onyx.utils.logger import setup_logger
-
-logger = setup_logger()
-
-
-class VisionEnabledConnector:
-    """
-    Mixin for connectors that need vision capabilities.
-
-    This mixin provides a standard way to initialize a vision-capable LLM
-    for image analysis during indexing.
-
-    Usage:
-        class MyConnector(LoadConnector, VisionEnabledConnector):
-            def __init__(self, ...):
-                super().__init__(...)
-                self.initialize_vision_llm()
-    """
-
-    def initialize_vision_llm(self) -> None:
-        """
-        Initialize a vision-capable LLM if enabled by configuration.
-
-        Sets self.image_analysis_llm to the LLM instance or None if disabled.
-        """
-        self.image_analysis_llm: LLM | None = None
-        if get_image_extraction_and_analysis_enabled():
-            try:
-                self.image_analysis_llm = get_default_llm_with_vision()
-                if self.image_analysis_llm is None:
-                    logger.warning(
-                        "No LLM with vision found; image summarization will be disabled"
-                    )
-            except Exception as e:
-                logger.warning(
-                    f"Failed to initialize vision LLM due to an error: {str(e)}. "
-                    "Image summarization will be disabled."
-                )
-                self.image_analysis_llm = None
--- a/backend/onyx/connectors/web/connector.py
+++ b/backend/onyx/connectors/web/connector.py
@@ -157,7 +157,6 @@ def get_internal_links(

 def start_playwright() -> Tuple[Playwright, BrowserContext]:
    playwright = sync_playwright().start()
-
    browser = playwright.chromium.launch(headless=True)

    context = browser.new_context()
@@ -333,7 +332,7 @@ class WebConnector(LoadConnector):
                if initial_url.split(".")[-1] == "pdf":
                    # PDF files are not checked for links
                    response = requests.get(initial_url)
-                    page_text, metadata, images = read_pdf_file(
+                    page_text, metadata = read_pdf_file(
                        file=io.BytesIO(response.content)
                    )
                    last_modified = response.headers.get("Last-Modified")
--- a/backend/onyx/context/search/models.py
+++ b/backend/onyx/context/search/models.py
@@ -16,7 +16,7 @@ from onyx.db.models import SearchSettings
 from onyx.indexing.models import BaseChunk
 from onyx.indexing.models import IndexingSetting
 from shared_configs.enums import RerankerProvider
-from shared_configs.model_server_models import Embedding
+

 MAX_METRICS_CONTENT = (
    200  # Just need enough characters to identify where in the doc the chunk is
@@ -151,10 +151,6 @@ class SearchRequest(ChunkContext):
    evaluation_type: LLMEvaluationType = LLMEvaluationType.UNSPECIFIED
    model_config = ConfigDict(arbitrary_types_allowed=True)

-    precomputed_query_embedding: Embedding | None = None
-    precomputed_is_keyword: bool | None = None
-    precomputed_keywords: list[str] | None = None
-

 class SearchQuery(ChunkContext):
    "Processed Request that is directly passed to the SearchPipeline"
@@ -179,8 +175,6 @@ class SearchQuery(ChunkContext):
    offset: int = 0
    model_config = ConfigDict(frozen=True)

-    precomputed_query_embedding: Embedding | None = None
-

 class RetrievalDetails(ChunkContext):
    # Use LLM to determine whether to do a retrieval or only rely on existing history
--- a/backend/onyx/context/search/pipeline.py
+++ b/backend/onyx/context/search/pipeline.py
@@ -331,14 +331,6 @@ class SearchPipeline:
        self._retrieved_sections = expanded_inference_sections
        return expanded_inference_sections

-    @property
-    def retrieved_sections(self) -> list[InferenceSection]:
-        if self._retrieved_sections is not None:
-            return self._retrieved_sections
-
-        self._retrieved_sections = self._get_sections()
-        return self._retrieved_sections
-
    @property
    def reranked_sections(self) -> list[InferenceSection]:
        """Reranking is always done at the chunk level since section merging could create arbitrarily
@@ -351,7 +343,7 @@ class SearchPipeline:
        if self._reranked_sections is not None:
            return self._reranked_sections

-        retrieved_sections = self.retrieved_sections
+        retrieved_sections = self._get_sections()
        if self.retrieved_sections_callback is not None:
            self.retrieved_sections_callback(retrieved_sections)

--- a/backend/onyx/context/search/postprocessing/postprocessing.py
+++ b/backend/onyx/context/search/postprocessing/postprocessing.py
@@ -1,17 +1,12 @@
-import base64
 from collections.abc import Callable
 from collections.abc import Iterator
 from typing import cast

 import numpy
-from langchain_core.messages import BaseMessage
-from langchain_core.messages import HumanMessage
-from langchain_core.messages import SystemMessage

 from onyx.chat.models import SectionRelevancePiece
 from onyx.configs.app_configs import BLURB_SIZE
 from onyx.configs.constants import RETURN_SEPARATOR
-from onyx.configs.llm_configs import get_search_time_image_analysis_enabled
 from onyx.configs.model_configs import CROSS_ENCODER_RANGE_MAX
 from onyx.configs.model_configs import CROSS_ENCODER_RANGE_MIN
 from onyx.context.search.enums import LLMEvaluationType
@@ -23,15 +18,11 @@ from onyx.context.search.models import MAX_METRICS_CONTENT
 from onyx.context.search.models import RerankingDetails
 from onyx.context.search.models import RerankMetricsContainer
 from onyx.context.search.models import SearchQuery
-from onyx.db.engine import get_session_with_current_tenant
 from onyx.document_index.document_index_utils import (
    translate_boost_count_to_multiplier,
 )
-from onyx.file_store.file_store import get_default_file_store
 from onyx.llm.interfaces import LLM
-from onyx.llm.utils import message_to_string
 from onyx.natural_language_processing.search_nlp_models import RerankingModel
-from onyx.prompts.image_analysis import IMAGE_ANALYSIS_SYSTEM_PROMPT
 from onyx.secondary_llm_flows.chunk_usefulness import llm_batch_eval_sections
 from onyx.utils.logger import setup_logger
 from onyx.utils.threadpool_concurrency import FunctionCall
@@ -39,124 +30,6 @@ from onyx.utils.threadpool_concurrency import run_functions_in_parallel
 from onyx.utils.timing import log_function_time


-def update_image_sections_with_query(
-    sections: list[InferenceSection],
-    query: str,
-    llm: LLM,
-) -> None:
-    """
-    For each chunk in each section that has an image URL, call an LLM to produce
-    a new 'content' string that directly addresses the user's query about that image.
-    This implementation uses parallel processing for efficiency.
-    """
-    logger = setup_logger()
-    logger.debug(f"Starting image section update with query: {query}")
-
-    chunks_with_images = []
-    for section in sections:
-        for chunk in section.chunks:
-            if chunk.image_file_name:
-                chunks_with_images.append(chunk)
-
-    if not chunks_with_images:
-        logger.debug("No images to process in the sections")
-        return  # No images to process
-
-    logger.info(f"Found {len(chunks_with_images)} chunks with images to process")
-
-    def process_image_chunk(chunk: InferenceChunk) -> tuple[str, str]:
-        try:
-            logger.debug(
-                f"Processing image chunk with ID: {chunk.unique_id}, image: {chunk.image_file_name}"
-            )
-            with get_session_with_current_tenant() as db_session:
-                file_record = get_default_file_store(db_session).read_file(
-                    cast(str, chunk.image_file_name), mode="b"
-                )
-                if not file_record:
-                    logger.error(f"Image file not found: {chunk.image_file_name}")
-                    raise Exception("File not found")
-                file_content = file_record.read()
-                image_base64 = base64.b64encode(file_content).decode()
-                logger.debug(
-                    f"Successfully loaded image data for {chunk.image_file_name}"
-                )
-
-            messages: list[BaseMessage] = [
-                SystemMessage(content=IMAGE_ANALYSIS_SYSTEM_PROMPT),
-                HumanMessage(
-                    content=[
-                        {
-                            "type": "text",
-                            "text": (
-                                f"The user's question is: '{query}'. "
-                                "Please analyze the following image in that context:\n"
-                            ),
-                        },
-                        {
-                            "type": "image_url",
-                            "image_url": {
-                                "url": f"data:image/jpeg;base64,{image_base64}",
-                            },
-                        },
-                    ]
-                ),
-            ]
-
-            raw_response = llm.invoke(messages)
-
-            answer_text = message_to_string(raw_response).strip()
-            return (
-                chunk.unique_id,
-                answer_text if answer_text else "No relevant info found.",
-            )
-
-        except Exception:
-            logger.exception(
-                f"Error updating image section with query source image url: {chunk.image_file_name}"
-            )
-            return chunk.unique_id, "Error analyzing image."
-
-    image_processing_tasks = [
-        FunctionCall(process_image_chunk, (chunk,)) for chunk in chunks_with_images
-    ]
-
-    logger.info(
-        f"Starting parallel processing of {len(image_processing_tasks)} image tasks"
-    )
-    image_processing_results = run_functions_in_parallel(image_processing_tasks)
-    logger.info(
-        f"Completed parallel processing with {len(image_processing_results)} results"
-    )
-
-    # Create a mapping of chunk IDs to their processed content
-    chunk_id_to_content = {}
-    success_count = 0
-    for task_id, result in image_processing_results.items():
-        if result:
-            chunk_id, content = result
-            chunk_id_to_content[chunk_id] = content
-            success_count += 1
-        else:
-            logger.error(f"Task {task_id} failed to return a valid result")
-
-    logger.info(
-        f"Successfully processed {success_count}/{len(image_processing_results)} images"
-    )
-
-    # Update the chunks with the processed content
-    updated_count = 0
-    for section in sections:
-        for chunk in section.chunks:
-            if chunk.unique_id in chunk_id_to_content:
-                chunk.content = chunk_id_to_content[chunk.unique_id]
-                updated_count += 1
-
-    logger.info(
-        f"Updated content for {updated_count} chunks with image analysis results"
-    )
-
-
 logger = setup_logger()


@@ -413,10 +286,6 @@ def search_postprocessing(
        # NOTE: if we don't rerank, we can return the chunks immediately
        # since we know this is the final order.
        # This way the user experience isn't delayed by the LLM step
-        if get_search_time_image_analysis_enabled():
-            update_image_sections_with_query(
-                retrieved_sections, search_query.query, llm
-            )
        _log_top_section_links(search_query.search_type.value, retrieved_sections)
        yield retrieved_sections
        sections_yielded = True
@@ -454,13 +323,6 @@ def search_postprocessing(
            )
        else:
            _log_top_section_links(search_query.search_type.value, reranked_sections)
-
-            # Add the image processing step here
-            if get_search_time_image_analysis_enabled():
-                update_image_sections_with_query(
-                    reranked_sections, search_query.query, llm
-                )
-
            yield reranked_sections

    llm_selected_section_ids = (
--- a/backend/onyx/context/search/preprocessing/preprocessing.py
+++ b/backend/onyx/context/search/preprocessing/preprocessing.py
@@ -117,12 +117,8 @@ def retrieval_preprocessing(
        else None
    )

-    # Sometimes this is pre-computed in parallel with other heavy tasks to improve
-    # latency, and in that case we don't need to run the model again
    run_query_analysis = (
-        None
-        if (skip_query_analysis or search_request.precomputed_is_keyword is not None)
-        else FunctionCall(query_analysis, (query,), {})
+        None if skip_query_analysis else FunctionCall(query_analysis, (query,), {})
    )

    functions_to_run = [
@@ -147,12 +143,11 @@ def retrieval_preprocessing(

    # The extracted keywords right now are not very reliable, not using for now
    # Can maybe use for highlighting
-    is_keyword, _extracted_keywords = False, None
-    if search_request.precomputed_is_keyword is not None:
-        is_keyword = search_request.precomputed_is_keyword
-        _extracted_keywords = search_request.precomputed_keywords
-    elif run_query_analysis:
-        is_keyword, _extracted_keywords = parallel_results[run_query_analysis.result_id]
+    is_keyword, extracted_keywords = (
+        parallel_results[run_query_analysis.result_id]
+        if run_query_analysis
+        else (False, None)
+    )

    all_query_terms = query.split()
    processed_keywords = (
@@ -252,5 +247,4 @@ def retrieval_preprocessing(
        chunks_above=chunks_above,
        chunks_below=chunks_below,
        full_doc=search_request.full_doc,
-        precomputed_query_embedding=search_request.precomputed_query_embedding,
    )
--- a/backend/onyx/context/search/retrieval/search_runner.py
+++ b/backend/onyx/context/search/retrieval/search_runner.py
@@ -31,7 +31,7 @@ from onyx.utils.timing import log_function_time
 from shared_configs.configs import MODEL_SERVER_HOST
 from shared_configs.configs import MODEL_SERVER_PORT
 from shared_configs.enums import EmbedTextType
-from shared_configs.model_server_models import Embedding
+

 logger = setup_logger()

@@ -109,20 +109,6 @@ def combine_retrieval_results(
    return sorted_chunks


-def get_query_embedding(query: str, db_session: Session) -> Embedding:
-    search_settings = get_current_search_settings(db_session)
-
-    model = EmbeddingModel.from_db_model(
-        search_settings=search_settings,
-        # The below are globally set, this flow always uses the indexing one
-        server_host=MODEL_SERVER_HOST,
-        server_port=MODEL_SERVER_PORT,
-    )
-
-    query_embedding = model.encode([query], text_type=EmbedTextType.QUERY)[0]
-    return query_embedding
-
-
@log_function_time(print_only=True)
 def doc_index_retrieval(
    query: SearchQuery,
@@ -135,10 +121,17 @@ def doc_index_retrieval(
    from the large chunks to the referenced chunks,
    dedupes the chunks, and cleans the chunks.
    """
-    query_embedding = query.precomputed_query_embedding or get_query_embedding(
-        query.query, db_session
+    search_settings = get_current_search_settings(db_session)
+
+    model = EmbeddingModel.from_db_model(
+        search_settings=search_settings,
+        # The below are globally set, this flow always uses the indexing one
+        server_host=MODEL_SERVER_HOST,
+        server_port=MODEL_SERVER_PORT,
    )

+    query_embedding = model.encode([query.query], text_type=EmbedTextType.QUERY)[0]
+
    top_chunks = document_index.hybrid_retrieval(
        query=query.query,
        query_embedding=query_embedding,
@@ -256,16 +249,7 @@ def retrieve_chunks(
                continue
            simplified_queries.add(simplified_rephrase)

-            q_copy = query.model_copy(
-                update={
-                    "query": rephrase,
-                    # need to recompute for each rephrase
-                    # note that `SearchQuery` is a frozen model, so we can't update
-                    # it below
-                    "precomputed_query_embedding": None,
-                },
-                deep=True,
-            )
+            q_copy = query.copy(update={"query": rephrase}, deep=True)
            run_queries.append(
                (
                    doc_index_retrieval,
--- a/backend/onyx/db/pg_file_store.py
+++ b/backend/onyx/db/pg_file_store.py
@@ -148,28 +148,3 @@ def upsert_pgfilestore(
        db_session.commit()

    return pgfilestore
-
-
-def save_bytes_to_pgfilestore(
-    db_session: Session,
-    raw_bytes: bytes,
-    media_type: str,
-    identifier: str,
-    display_name: str,
-    file_origin: FileOrigin = FileOrigin.OTHER,
-) -> PGFileStore:
-    """
-    Saves raw bytes to PGFileStore and returns the resulting record.
-    """
-    file_name = f"{file_origin.name.lower()}_{identifier}"
-    lobj_oid = create_populate_lobj(BytesIO(raw_bytes), db_session)
-    pgfilestore = upsert_pgfilestore(
-        file_name=file_name,
-        display_name=display_name,
-        file_origin=file_origin,
-        file_type=media_type,
-        lobj_oid=lobj_oid,
-        db_session=db_session,
-        commit=True,
-    )
-    return pgfilestore
--- a/backend/onyx/db/seeding/chat_history_seeding.py
+++ b/backend/onyx/db/seeding/chat_history_seeding.py
@@ -1,79 +0,0 @@
-import random
-from datetime import datetime
-from datetime import timedelta
-from logging import getLogger
-
-from onyx.configs.constants import MessageType
-from onyx.db.chat import create_chat_session
-from onyx.db.chat import create_new_chat_message
-from onyx.db.chat import get_or_create_root_message
-from onyx.db.engine import get_session_with_current_tenant
-from onyx.db.models import ChatSession
-
-logger = getLogger(__name__)
-
-
-def seed_chat_history(num_sessions: int, num_messages: int, days: int) -> None:
-    """Utility function to seed chat history for testing.
-
-    num_sessions: the number of sessions to seed
-    num_messages: the number of messages to seed per sessions
-    days: the number of days looking backwards from the current time over which to randomize
-    the times.
-    """
-    with get_session_with_current_tenant() as db_session:
-        logger.info(f"Seeding {num_sessions} sessions.")
-        for y in range(0, num_sessions):
-            create_chat_session(db_session, f"pytest_session_{y}", None, None)
-
-        # randomize all session times
-        logger.info(f"Seeding {num_messages} messages per session.")
-        rows = db_session.query(ChatSession).all()
-        for x in range(0, len(rows)):
-            if x % 1024 == 0:
-                logger.info(f"Seeded messages for {x} sessions so far.")
-
-            row = rows[x]
-            row.time_created = datetime.utcnow() - timedelta(
-                days=random.randint(0, days)
-            )
-            row.time_updated = row.time_created + timedelta(
-                minutes=random.randint(0, 10)
-            )
-
-            root_message = get_or_create_root_message(row.id, db_session)
-
-            current_message_type = MessageType.USER
-            parent_message = root_message
-            for x in range(0, num_messages):
-                if current_message_type == MessageType.USER:
-                    msg = f"pytest_message_user_{x}"
-                else:
-                    msg = f"pytest_message_assistant_{x}"
-
-                chat_message = create_new_chat_message(
-                    row.id,
-                    parent_message,
-                    msg,
-                    None,
-                    0,
-                    current_message_type,
-                    db_session,
-                )
-
-                chat_message.time_sent = row.time_created + timedelta(
-                    minutes=random.randint(0, 10)
-                )
-
-                db_session.commit()
-
-                current_message_type = (
-                    MessageType.ASSISTANT
-                    if current_message_type == MessageType.USER
-                    else MessageType.USER
-                )
-                parent_message = chat_message
-
-        db_session.commit()
-
-        logger.info(f"Seeded messages for {len(rows)} sessions. Finished.")
--- a/backend/onyx/document_index/vespa/app_config/schemas/danswer_chunk.sd
+++ b/backend/onyx/document_index/vespa/app_config/schemas/danswer_chunk.sd
@@ -55,9 +55,6 @@ schema DANSWER_CHUNK_NAME {
        field blurb type string {
            indexing: summary | attribute
        }
-        field image_file_name type string {
-            indexing: summary | attribute
-        }
        # https://docs.vespa.ai/en/attributes.html potential enum store for speed, but probably not worth it
        field source_type type string {
            indexing: summary | attribute
--- a/backend/onyx/document_index/vespa/chunk_retrieval.py
+++ b/backend/onyx/document_index/vespa/chunk_retrieval.py
@@ -31,7 +31,6 @@ from onyx.document_index.vespa_constants import DOC_UPDATED_AT
 from onyx.document_index.vespa_constants import DOCUMENT_ID
 from onyx.document_index.vespa_constants import DOCUMENT_ID_ENDPOINT
 from onyx.document_index.vespa_constants import HIDDEN
-from onyx.document_index.vespa_constants import IMAGE_FILE_NAME
 from onyx.document_index.vespa_constants import LARGE_CHUNK_REFERENCE_IDS
 from onyx.document_index.vespa_constants import MAX_ID_SEARCH_QUERY_SIZE
 from onyx.document_index.vespa_constants import MAX_OR_CONDITIONS
@@ -131,7 +130,6 @@ def _vespa_hit_to_inference_chunk(
        section_continuation=fields[SECTION_CONTINUATION],
        document_id=fields[DOCUMENT_ID],
        source_type=fields[SOURCE_TYPE],
-        image_file_name=fields.get(IMAGE_FILE_NAME),
        title=fields.get(TITLE),
        semantic_identifier=fields[SEMANTIC_IDENTIFIER],
        boost=fields.get(BOOST, 1),
@@ -213,7 +211,6 @@ def _get_chunks_via_visit_api(

        # Check if the response contains any documents
        response_data = response.json()
-
        if "documents" in response_data:
            for document in response_data["documents"]:
                if filters.access_control_list:
--- a/backend/onyx/document_index/vespa/indexing_utils.py
+++ b/backend/onyx/document_index/vespa/indexing_utils.py
@@ -32,7 +32,6 @@ from onyx.document_index.vespa_constants import DOCUMENT_ID
 from onyx.document_index.vespa_constants import DOCUMENT_ID_ENDPOINT
 from onyx.document_index.vespa_constants import DOCUMENT_SETS
 from onyx.document_index.vespa_constants import EMBEDDINGS
-from onyx.document_index.vespa_constants import IMAGE_FILE_NAME
 from onyx.document_index.vespa_constants import LARGE_CHUNK_REFERENCE_IDS
 from onyx.document_index.vespa_constants import METADATA
 from onyx.document_index.vespa_constants import METADATA_LIST
@@ -199,13 +198,13 @@ def _index_vespa_chunk(
        # which only calls VespaIndex.update
        ACCESS_CONTROL_LIST: {acl_entry: 1 for acl_entry in chunk.access.to_acl()},
        DOCUMENT_SETS: {document_set: 1 for document_set in chunk.document_sets},
-        IMAGE_FILE_NAME: chunk.image_file_name,
        BOOST: chunk.boost,
    }

    if multitenant:
        if chunk.tenant_id:
            vespa_document_fields[TENANT_ID] = chunk.tenant_id
+
    vespa_url = f"{DOCUMENT_ID_ENDPOINT.format(index_name=index_name)}/{vespa_chunk_id}"
    logger.debug(f'Indexing to URL "{vespa_url}"')
    res = http_client.post(
--- a/backend/onyx/document_index/vespa_constants.py
+++ b/backend/onyx/document_index/vespa_constants.py
@@ -77,7 +77,6 @@ PRIMARY_OWNERS = "primary_owners"
 SECONDARY_OWNERS = "secondary_owners"
 RECENCY_BIAS = "recency_bias"
 HIDDEN = "hidden"
-IMAGE_FILE_NAME = "image_file_name"

 # Specific to Vespa, needed for highlighting matching keywords / section
 CONTENT_SUMMARY = "content_summary"
@@ -95,7 +94,6 @@ YQL_BASE = (
    f"{SEMANTIC_IDENTIFIER}, "
    f"{TITLE}, "
    f"{SECTION_CONTINUATION}, "
-    f"{IMAGE_FILE_NAME}, "
    f"{BOOST}, "
    f"{HIDDEN}, "
    f"{DOC_UPDATED_AT}, "
--- a/backend/onyx/file_processing/extract_file_text.py
+++ b/backend/onyx/file_processing/extract_file_text.py
@@ -9,17 +9,15 @@ from email.parser import Parser as EmailParser
 from io import BytesIO
 from pathlib import Path
 from typing import Any
+from typing import Dict
 from typing import IO
-from typing import List
-from typing import Tuple

 import chardet
 import docx  # type: ignore
 import openpyxl  # type: ignore
 import pptx  # type: ignore
-from docx import Document as DocxDocument
+from docx import Document
 from fastapi import UploadFile
-from PIL import Image
 from pypdf import PdfReader
 from pypdf.errors import PdfStreamError

@@ -33,8 +31,10 @@ from onyx.utils.logger import setup_logger

 logger = setup_logger()

+
 TEXT_SECTION_SEPARATOR = "\n\n"

+
 PLAIN_TEXT_FILE_EXTENSIONS = [
    ".txt",
    ".md",
@@ -49,6 +49,7 @@ PLAIN_TEXT_FILE_EXTENSIONS = [
    ".yaml",
 ]

+
 VALID_FILE_EXTENSIONS = PLAIN_TEXT_FILE_EXTENSIONS + [
    ".pdf",
    ".docx",
@@ -57,16 +58,6 @@ VALID_FILE_EXTENSIONS = PLAIN_TEXT_FILE_EXTENSIONS + [
    ".eml",
    ".epub",
    ".html",
-    ".png",
-    ".jpg",
-    ".jpeg",
-    ".webp",
-]
-
-IMAGE_MEDIA_TYPES = [
-    "image/png",
-    "image/jpeg",
-    "image/webp",
 ]


@@ -76,13 +67,11 @@ def is_text_file_extension(file_name: str) -> bool:

 def get_file_ext(file_path_or_name: str | Path) -> str:
    _, extension = os.path.splitext(file_path_or_name)
+    # standardize all extensions to be lowercase so that checks against
+    # VALID_FILE_EXTENSIONS and similar will work as intended
    return extension.lower()


-def is_valid_media_type(media_type: str) -> bool:
-    return media_type in IMAGE_MEDIA_TYPES
-
-
 def is_valid_file_ext(ext: str) -> bool:
    return ext in VALID_FILE_EXTENSIONS

@@ -90,18 +79,17 @@ def is_valid_file_ext(ext: str) -> bool:
 def is_text_file(file: IO[bytes]) -> bool:
    """
    checks if the first 1024 bytes only contain printable or whitespace characters
-    if it does, then we say it's a plaintext file
+    if it does, then we say its a plaintext file
    """
    raw_data = file.read(1024)
-    file.seek(0)
    text_chars = bytearray({7, 8, 9, 10, 12, 13, 27} | set(range(0x20, 0x100)) - {0x7F})
    return all(c in text_chars for c in raw_data)


 def detect_encoding(file: IO[bytes]) -> str:
    raw_data = file.read(50000)
-    file.seek(0)
    encoding = chardet.detect(raw_data)["encoding"] or "utf-8"
+    file.seek(0)
    return encoding


@@ -111,14 +99,14 @@ def is_macos_resource_fork_file(file_name: str) -> bool:
    )


+# To include additional metadata in the search index, add a .onyx_metadata.json file
+# to the zip file. This file should contain a list of objects with the following format:
+# [{ "filename": "file1.txt", "link": "https://example.com/file1.txt" }]
 def load_files_from_zip(
    zip_file_io: IO,
    ignore_macos_resource_fork_files: bool = True,
    ignore_dirs: bool = True,
 ) -> Iterator[tuple[zipfile.ZipInfo, IO[Any], dict[str, Any]]]:
-    """
-    If there's a .onyx_metadata.json in the zip, attach those metadata to each subfile.
-    """
    with zipfile.ZipFile(zip_file_io, "r") as zip_file:
        zip_metadata = {}
        try:
@@ -130,31 +118,24 @@ def load_files_from_zip(
                        # convert list of dicts to dict of dicts
                        zip_metadata = {d["filename"]: d for d in zip_metadata}
                except json.JSONDecodeError:
-                    logger.warning(f"Unable to load {DANSWER_METADATA_FILENAME}")
+                    logger.warn(f"Unable to load {DANSWER_METADATA_FILENAME}")
        except KeyError:
            logger.info(f"No {DANSWER_METADATA_FILENAME} file")

        for file_info in zip_file.infolist():
-            if ignore_dirs and file_info.is_dir():
-                continue
+            with zip_file.open(file_info.filename, "r") as file:
+                if ignore_dirs and file_info.is_dir():
+                    continue

-            if (
-                ignore_macos_resource_fork_files
-                and is_macos_resource_fork_file(file_info.filename)
-            ) or file_info.filename == DANSWER_METADATA_FILENAME:
-                continue
-
-            with zip_file.open(file_info.filename, "r") as subfile:
-                yield file_info, subfile, zip_metadata.get(file_info.filename, {})
+                if (
+                    ignore_macos_resource_fork_files
+                    and is_macos_resource_fork_file(file_info.filename)
+                ) or file_info.filename == DANSWER_METADATA_FILENAME:
+                    continue
+                yield file_info, file, zip_metadata.get(file_info.filename, {})


 def _extract_onyx_metadata(line: str) -> dict | None:
-    """
-    Example: first line has:
-        <!-- DANSWER_METADATA={"title": "..."} -->
-      or
-        #DANSWER_METADATA={"title":"..."}
-    """
    html_comment_pattern = r"<!--\s*DANSWER_METADATA=\{(.*?)\}\s*-->"
    hashtag_pattern = r"#DANSWER_METADATA=\{(.*?)\}"

@@ -180,13 +161,9 @@ def read_text_file(
    errors: str = "replace",
    ignore_onyx_metadata: bool = True,
 ) -> tuple[str, dict]:
-    """
-    For plain text files. Optionally extracts Onyx metadata from the first line.
-    """
    metadata = {}
    file_content_raw = ""
    for ind, line in enumerate(file):
-        # decode
        try:
            line = line.decode(encoding) if isinstance(line, bytes) else line
        except UnicodeDecodeError:
@@ -196,132 +173,131 @@ def read_text_file(
                else line
            )

-        # optionally parse metadata in the first line
-        if ind == 0 and not ignore_onyx_metadata:
-            potential_meta = _extract_onyx_metadata(line)
-            if potential_meta is not None:
-                metadata = potential_meta
-                continue
-
-        file_content_raw += line
+        if ind == 0:
+            metadata_or_none = (
+                None if ignore_onyx_metadata else _extract_onyx_metadata(line)
+            )
+            if metadata_or_none is not None:
+                metadata = metadata_or_none
+            else:
+                file_content_raw += line
+        else:
+            file_content_raw += line

    return file_content_raw, metadata


 def pdf_to_text(file: IO[Any], pdf_pass: str | None = None) -> str:
-    """
-    Extract text from a PDF. For embedded images, a more complex approach is needed.
-    This is a minimal approach returning text only.
-    """
-    text, _, _ = read_pdf_file(file, pdf_pass)
+    """Extract text from a PDF file."""
+    # Return only the extracted text from read_pdf_file
+    text, _ = read_pdf_file(file, pdf_pass)
    return text


 def read_pdf_file(
-    file: IO[Any], pdf_pass: str | None = None, extract_images: bool = False
-) -> tuple[str, dict, list[tuple[bytes, str]]]:
-    """
-    Returns the text, basic PDF metadata, and optionally extracted images.
-    """
-    metadata: dict[str, Any] = {}
-    extracted_images: list[tuple[bytes, str]] = []
+    file: IO[Any],
+    pdf_pass: str | None = None,
+) -> tuple[str, dict]:
+    metadata: Dict[str, Any] = {}
    try:
        pdf_reader = PdfReader(file)

+        # If marked as encrypted and a password is provided, try to decrypt
        if pdf_reader.is_encrypted and pdf_pass is not None:
            decrypt_success = False
-            try:
-                decrypt_success = pdf_reader.decrypt(pdf_pass) != 0
-            except Exception:
-                logger.error("Unable to decrypt pdf")
+            if pdf_pass is not None:
+                try:
+                    decrypt_success = pdf_reader.decrypt(pdf_pass) != 0
+                except Exception:
+                    logger.error("Unable to decrypt pdf")

            if not decrypt_success:
-                return "", metadata, []
+                # By user request, keep files that are unreadable just so they
+                # can be discoverable by title.
+                return "", metadata
        elif pdf_reader.is_encrypted:
-            logger.warning("No Password for an encrypted PDF, returning empty text.")
-            return "", metadata, []
+            logger.warning("No Password available to decrypt pdf, returning empty")
+            return "", metadata

-        # Basic PDF metadata
+        # Extract metadata from the PDF, removing leading '/' from keys if present
+        # This standardizes the metadata keys for consistency
+        metadata = {}
        if pdf_reader.metadata is not None:
            for key, value in pdf_reader.metadata.items():
                clean_key = key.lstrip("/")
                if isinstance(value, str) and value.strip():
                    metadata[clean_key] = value
+
                elif isinstance(value, list) and all(
                    isinstance(item, str) for item in value
                ):
                    metadata[clean_key] = ", ".join(value)

-        text = TEXT_SECTION_SEPARATOR.join(
-            page.extract_text() for page in pdf_reader.pages
+        return (
+            TEXT_SECTION_SEPARATOR.join(
+                page.extract_text() for page in pdf_reader.pages
+            ),
+            metadata,
        )
-
-        if extract_images:
-            for page_num, page in enumerate(pdf_reader.pages):
-                for image_file_object in page.images:
-                    image = Image.open(io.BytesIO(image_file_object.data))
-                    img_byte_arr = io.BytesIO()
-                    image.save(img_byte_arr, format=image.format)
-                    img_bytes = img_byte_arr.getvalue()
-
-                    image_name = (
-                        f"page_{page_num + 1}_image_{image_file_object.name}."
-                        f"{image.format.lower() if image.format else 'png'}"
-                    )
-                    extracted_images.append((img_bytes, image_name))
-
-        return text, metadata, extracted_images
-
    except PdfStreamError:
-        logger.exception("Invalid PDF file")
+        logger.exception("PDF file is not a valid PDF")
    except Exception:
        logger.exception("Failed to read PDF")

-    return "", metadata, []
+    # File is still discoverable by title
+    # but the contents are not included as they cannot be parsed
+    return "", metadata


-def docx_to_text_and_images(
-    file: IO[Any],
-) -> Tuple[str, List[Tuple[bytes, str]]]:
-    """
-    Extract text from a docx. If embed_images=True, also extract inline images.
-    Return (text_content, list_of_images).
-    """
+def docx_to_text(file: IO[Any]) -> str:
+    def is_simple_table(table: docx.table.Table) -> bool:
+        for row in table.rows:
+            # No omitted cells
+            if row.grid_cols_before > 0 or row.grid_cols_after > 0:
+                return False
+
+            # No nested tables
+            if any(cell.tables for cell in row.cells):
+                return False
+
+        return True
+
+    def extract_cell_text(cell: docx.table._Cell) -> str:
+        cell_paragraphs = [para.text.strip() for para in cell.paragraphs]
+        return " ".join(p for p in cell_paragraphs if p) or "N/A"
+
    paragraphs = []
-    embedded_images: List[Tuple[bytes, str]] = []
-
    doc = docx.Document(file)
+    for item in doc.iter_inner_content():
+        if isinstance(item, docx.text.paragraph.Paragraph):
+            paragraphs.append(item.text)

-    # Grab text from paragraphs
-    for paragraph in doc.paragraphs:
-        paragraphs.append(paragraph.text)
+        elif isinstance(item, docx.table.Table):
+            if not item.rows or not is_simple_table(item):
+                continue

-    # Reset position so we can re-load the doc (python-docx has read the stream)
-    # Note: if python-docx has fully consumed the stream, you may need to open it again from memory.
-    # For large docs, a more robust approach is needed.
-    # This is a simplified example.
+            # Every row is a new line, joined with a single newline
+            table_content = "\n".join(
+                [
+                    ",\t".join(extract_cell_text(cell) for cell in row.cells)
+                    for row in item.rows
+                ]
+            )
+            paragraphs.append(table_content)

-    for rel_id, rel in doc.part.rels.items():
-        if "image" in rel.reltype:
-            # image is typically in rel.target_part.blob
-            image_bytes = rel.target_part.blob
-            image_name = rel.target_part.partname
-            # store
-            embedded_images.append((image_bytes, os.path.basename(str(image_name))))
-
-    text_content = "\n".join(paragraphs)
-    return text_content, embedded_images
+    # Docx already has good spacing between paragraphs
+    return "\n".join(paragraphs)


 def pptx_to_text(file: IO[Any]) -> str:
    presentation = pptx.Presentation(file)
    text_content = []
    for slide_number, slide in enumerate(presentation.slides, start=1):
-        slide_text = f"\nSlide {slide_number}:\n"
+        extracted_text = f"\nSlide {slide_number}:\n"
        for shape in slide.shapes:
            if hasattr(shape, "text"):
-                slide_text += shape.text + "\n"
-        text_content.append(slide_text)
+                extracted_text += shape.text + "\n"
+        text_content.append(extracted_text)
    return TEXT_SECTION_SEPARATOR.join(text_content)


@@ -329,21 +305,18 @@ def xlsx_to_text(file: IO[Any]) -> str:
    workbook = openpyxl.load_workbook(file, read_only=True)
    text_content = []
    for sheet in workbook.worksheets:
-        rows = []
-        for row in sheet.iter_rows(min_row=1, values_only=True):
-            row_str = ",".join(str(cell) if cell is not None else "" for cell in row)
-            rows.append(row_str)
-        sheet_str = "\n".join(rows)
-        text_content.append(sheet_str)
+        sheet_string = "\n".join(
+            ",".join(map(str, row))
+            for row in sheet.iter_rows(min_row=1, values_only=True)
+        )
+        text_content.append(sheet_string)
    return TEXT_SECTION_SEPARATOR.join(text_content)


 def eml_to_text(file: IO[Any]) -> str:
-    encoding = detect_encoding(file)
-    text_file = io.TextIOWrapper(file, encoding=encoding)
+    text_file = io.TextIOWrapper(file, encoding=detect_encoding(file))
    parser = EmailParser()
    message = parser.parse(text_file)
-
    text_content = []
    for part in message.walk():
        if part.get_content_type().startswith("text/plain"):
@@ -369,8 +342,8 @@ def epub_to_text(file: IO[Any]) -> str:

 def file_io_to_text(file: IO[Any]) -> str:
    encoding = detect_encoding(file)
-    file_content, _ = read_text_file(file, encoding=encoding)
-    return file_content
+    file_content_raw, _ = read_text_file(file, encoding=encoding)
+    return file_content_raw


 def extract_file_text(
@@ -379,13 +352,9 @@ def extract_file_text(
    break_on_unprocessable: bool = True,
    extension: str | None = None,
 ) -> str:
-    """
-    Legacy function that returns *only text*, ignoring embedded images.
-    For backward-compatibility in code that only wants text.
-    """
    extension_to_function: dict[str, Callable[[IO[Any]], str]] = {
        ".pdf": pdf_to_text,
-        ".docx": lambda f: docx_to_text_and_images(f)[0],  # no images
+        ".docx": docx_to_text,
        ".pptx": pptx_to_text,
        ".xlsx": xlsx_to_text,
        ".eml": eml_to_text,
@@ -399,23 +368,24 @@ def extract_file_text(
                return unstructured_to_text(file, file_name)
            except Exception as unstructured_error:
                logger.error(
-                    f"Failed to process with Unstructured: {str(unstructured_error)}. "
-                    "Falling back to normal processing."
+                    f"Failed to process with Unstructured: {str(unstructured_error)}. Falling back to normal processing."
                )
-        if extension is None:
-            extension = get_file_ext(file_name)
+                # Fall through to normal processing
+        final_extension: str
+        if file_name or extension:
+            if extension is not None:
+                final_extension = extension
+            elif file_name is not None:
+                final_extension = get_file_ext(file_name)

-        if is_valid_file_ext(extension):
-            func = extension_to_function.get(extension, file_io_to_text)
-            file.seek(0)
-            return func(file)
+            if is_valid_file_ext(final_extension):
+                return extension_to_function.get(final_extension, file_io_to_text)(file)

-        # If unknown extension, maybe it's a text file
-        file.seek(0)
+        # Either the file somehow has no name or the extension is not one that we recognize
        if is_text_file(file):
            return file_io_to_text(file)

-        raise ValueError("Unknown file extension or not recognized as text data")
+        raise ValueError("Unknown file extension and unknown text encoding")

    except Exception as e:
        if break_on_unprocessable:
@@ -426,93 +396,20 @@ def extract_file_text(
        return ""


-def extract_text_and_images(
-    file: IO[Any],
-    file_name: str,
-    pdf_pass: str | None = None,
-) -> Tuple[str, List[Tuple[bytes, str]]]:
-    """
-    Primary new function for the updated connector.
-    Returns (text_content, [(embedded_img_bytes, embedded_img_name), ...]).
-    """
-
-    try:
-        # Attempt unstructured if env var is set
-        if get_unstructured_api_key():
-            # If the user doesn't want embedded images, unstructured is fine
-            file.seek(0)
-            text_content = unstructured_to_text(file, file_name)
-            return (text_content, [])
-
-        extension = get_file_ext(file_name)
-
-        # docx example for embedded images
-        if extension == ".docx":
-            file.seek(0)
-            text_content, images = docx_to_text_and_images(file)
-            return (text_content, images)
-
-        # PDF example: we do not show complicated PDF image extraction here
-        # so we simply extract text for now and skip images.
-        if extension == ".pdf":
-            file.seek(0)
-            text_content, _, images = read_pdf_file(file, pdf_pass, extract_images=True)
-            return (text_content, images)
-
-        # For PPTX, XLSX, EML, etc., we do not show embedded image logic here.
-        # You can do something similar to docx if needed.
-        if extension == ".pptx":
-            file.seek(0)
-            return (pptx_to_text(file), [])
-
-        if extension == ".xlsx":
-            file.seek(0)
-            return (xlsx_to_text(file), [])
-
-        if extension == ".eml":
-            file.seek(0)
-            return (eml_to_text(file), [])
-
-        if extension == ".epub":
-            file.seek(0)
-            return (epub_to_text(file), [])
-
-        if extension == ".html":
-            file.seek(0)
-            return (parse_html_page_basic(file), [])
-
-        # If we reach here and it's a recognized text extension
-        if is_text_file_extension(file_name):
-            file.seek(0)
-            encoding = detect_encoding(file)
-            text_content_raw, _ = read_text_file(
-                file, encoding=encoding, ignore_onyx_metadata=False
-            )
-            return (text_content_raw, [])
-
-        # If it's an image file or something else, we do not parse embedded images from them
-        # just return empty text
-        file.seek(0)
-        return ("", [])
-
-    except Exception as e:
-        logger.exception(f"Failed to extract text/images from {file_name}: {e}")
-        return ("", [])
-
-
 def convert_docx_to_txt(
    file: UploadFile, file_store: FileStore, file_path: str
 ) -> None:
-    """
-    Helper to convert docx to a .txt file in the same filestore.
-    """
    file.file.seek(0)
    docx_content = file.file.read()
-    doc = DocxDocument(BytesIO(docx_content))
+    doc = Document(BytesIO(docx_content))

    # Extract text from the document
-    all_paras = [p.text for p in doc.paragraphs]
-    text_content = "\n".join(all_paras)
+    full_text = []
+    for para in doc.paragraphs:
+        full_text.append(para.text)
+
+    # Join the extracted text
+    text_content = "\n".join(full_text)

    txt_file_path = docx_to_txt_filename(file_path)
    file_store.save_file(
@@ -525,4 +422,7 @@ def convert_docx_to_txt(


 def docx_to_txt_filename(file_path: str) -> str:
+    """
+    Convert a .docx file path to its corresponding .txt file path.
+    """
    return file_path.rsplit(".", 1)[0] + ".txt"
--- a/backend/onyx/file_processing/file_validation.py
+++ b/backend/onyx/file_processing/file_validation.py
@@ -1,46 +0,0 @@
-"""
-Centralized file type validation utilities.
-"""
-# Standard image MIME types supported by most vision LLMs
-IMAGE_MIME_TYPES = [
-    "image/png",
-    "image/jpeg",
-    "image/jpg",
-    "image/webp",
-]
-
-# Image types that should be excluded from processing
-EXCLUDED_IMAGE_TYPES = [
-    "image/bmp",
-    "image/tiff",
-    "image/gif",
-    "image/svg+xml",
-]
-
-
-def is_valid_image_type(mime_type: str) -> bool:
-    """
-    Check if mime_type is a valid image type.
-
-    Args:
-        mime_type: The MIME type to check
-
-    Returns:
-        True if the MIME type is a valid image type, False otherwise
-    """
-    if not mime_type:
-        return False
-    return mime_type.startswith("image/") and mime_type not in EXCLUDED_IMAGE_TYPES
-
-
-def is_supported_by_vision_llm(mime_type: str) -> bool:
-    """
-    Check if this image type can be processed by vision LLMs.
-
-    Args:
-        mime_type: The MIME type to check
-
-    Returns:
-        True if the MIME type is supported by vision LLMs, False otherwise
-    """
-    return mime_type in IMAGE_MIME_TYPES
--- a/backend/onyx/file_processing/image_summarization.py
+++ b/backend/onyx/file_processing/image_summarization.py
@@ -1,129 +0,0 @@
-import base64
-from io import BytesIO
-
-from langchain_core.messages import BaseMessage
-from langchain_core.messages import HumanMessage
-from langchain_core.messages import SystemMessage
-from PIL import Image
-
-from onyx.llm.interfaces import LLM
-from onyx.llm.utils import message_to_string
-from onyx.prompts.image_analysis import IMAGE_SUMMARIZATION_SYSTEM_PROMPT
-from onyx.prompts.image_analysis import IMAGE_SUMMARIZATION_USER_PROMPT
-from onyx.utils.logger import setup_logger
-
-logger = setup_logger()
-
-
-def prepare_image_bytes(image_data: bytes) -> str:
-    """Prepare image bytes for summarization.
-    Resizes image if it's larger than 20MB. Encodes image as a base64 string."""
-    image_data = _resize_image_if_needed(image_data)
-
-    # encode image (base64)
-    encoded_image = _encode_image_for_llm_prompt(image_data)
-
-    return encoded_image
-
-
-def summarize_image_pipeline(
-    llm: LLM,
-    image_data: bytes,
-    query: str | None = None,
-    system_prompt: str | None = None,
-) -> str:
-    """Pipeline to generate a summary of an image.
-    Resizes images if it is bigger than 20MB. Encodes image as a base64 string.
-    And finally uses the Default LLM to generate a textual summary of the image."""
-    # resize image if it's bigger than 20MB
-    encoded_image = prepare_image_bytes(image_data)
-
-    summary = _summarize_image(
-        encoded_image,
-        llm,
-        query,
-        system_prompt,
-    )
-
-    return summary
-
-
-def summarize_image_with_error_handling(
-    llm: LLM | None,
-    image_data: bytes,
-    context_name: str,
-    system_prompt: str = IMAGE_SUMMARIZATION_SYSTEM_PROMPT,
-    user_prompt_template: str = IMAGE_SUMMARIZATION_USER_PROMPT,
-) -> str | None:
-    """Wrapper function that handles error cases and configuration consistently.
-
-    Args:
-        llm: The LLM with vision capabilities to use for summarization
-        image_data: The raw image bytes
-        context_name: Name or title of the image for context
-        system_prompt: System prompt to use for the LLM
-        user_prompt_template: Template for the user prompt, should contain {title} placeholder
-
-    Returns:
-        The image summary text, or None if summarization failed or is disabled
-    """
-    if llm is None:
-        return None
-
-    user_prompt = user_prompt_template.format(title=context_name)
-    return summarize_image_pipeline(llm, image_data, user_prompt, system_prompt)
-
-
-def _summarize_image(
-    encoded_image: str,
-    llm: LLM,
-    query: str | None = None,
-    system_prompt: str | None = None,
-) -> str:
-    """Use default LLM (if it is multimodal) to generate a summary of an image."""
-
-    messages: list[BaseMessage] = []
-
-    if system_prompt:
-        messages.append(SystemMessage(content=system_prompt))
-
-    messages.append(
-        HumanMessage(
-            content=[
-                {"type": "text", "text": query},
-                {"type": "image_url", "image_url": {"url": encoded_image}},
-            ],
-        ),
-    )
-
-    try:
-        return message_to_string(llm.invoke(messages))
-
-    except Exception as e:
-        raise ValueError(f"Summarization failed. Messages: {messages}") from e
-
-
-def _encode_image_for_llm_prompt(image_data: bytes) -> str:
-    """Getting the base64 string."""
-    base64_encoded_data = base64.b64encode(image_data).decode("utf-8")
-
-    return f"data:image/jpeg;base64,{base64_encoded_data}"
-
-
-def _resize_image_if_needed(image_data: bytes, max_size_mb: int = 20) -> bytes:
-    """Resize image if it's larger than the specified max size in MB."""
-    max_size_bytes = max_size_mb * 1024 * 1024
-
-    if len(image_data) > max_size_bytes:
-        with Image.open(BytesIO(image_data)) as img:
-            # Reduce dimensions for better size reduction
-            img.thumbnail((1024, 1024), Image.Resampling.LANCZOS)
-            output = BytesIO()
-
-            # Save with lower quality for compression
-            img.save(output, format="JPEG", quality=85)
-            resized_data = output.getvalue()
-
-            return resized_data
-
-    return image_data
--- a/backend/onyx/file_processing/image_utils.py
+++ b/backend/onyx/file_processing/image_utils.py
@@ -1,70 +0,0 @@
-from typing import Tuple
-
-from sqlalchemy.orm import Session
-
-from onyx.configs.app_configs import CONTINUE_ON_CONNECTOR_FAILURE
-from onyx.configs.constants import FileOrigin
-from onyx.connectors.models import Section
-from onyx.db.pg_file_store import save_bytes_to_pgfilestore
-from onyx.file_processing.image_summarization import summarize_image_with_error_handling
-from onyx.llm.interfaces import LLM
-from onyx.utils.logger import setup_logger
-
-logger = setup_logger()
-
-
-def store_image_and_create_section(
-    db_session: Session,
-    image_data: bytes,
-    file_name: str,
-    display_name: str,
-    media_type: str = "image/unknown",
-    llm: LLM | None = None,
-    file_origin: FileOrigin = FileOrigin.OTHER,
-) -> Tuple[Section, str | None]:
-    """
-    Stores an image in PGFileStore and creates a Section object with optional summarization.
-
-    Args:
-        db_session: Database session
-        image_data: Raw image bytes
-        file_name: Base identifier for the file
-        display_name: Human-readable name for the image
-        media_type: MIME type of the image
-        llm: Optional LLM with vision capabilities for summarization
-        file_origin: Origin of the file (e.g., CONFLUENCE, GOOGLE_DRIVE, etc.)
-
-    Returns:
-        Tuple containing:
-        - Section object with image reference and optional summary text
-        - The file_name in PGFileStore or None if storage failed
-    """
-    # Storage logic
-    stored_file_name = None
-    try:
-        pgfilestore = save_bytes_to_pgfilestore(
-            db_session=db_session,
-            raw_bytes=image_data,
-            media_type=media_type,
-            identifier=file_name,
-            display_name=display_name,
-            file_origin=file_origin,
-        )
-        stored_file_name = pgfilestore.file_name
-    except Exception as e:
-        logger.error(f"Failed to store image: {e}")
-        if not CONTINUE_ON_CONNECTOR_FAILURE:
-            raise
-        return Section(text=""), None
-
-    # Summarization logic
-    summary_text = ""
-    if llm:
-        summary_text = (
-            summarize_image_with_error_handling(llm, image_data, display_name) or ""
-        )
-
-    return (
-        Section(text=summary_text, image_file_name=stored_file_name),
-        stored_file_name,
-    )
--- a/backend/onyx/indexing/chunker.py
+++ b/backend/onyx/indexing/chunker.py
@@ -23,9 +23,12 @@ from shared_configs.configs import STRICT_CHUNK_TOKEN_LIMIT
 CHUNK_OVERLAP = 0
 # Fairly arbitrary numbers but the general concept is we don't want the title/metadata to
 # overwhelm the actual contents of the chunk
+# For example in a rare case, this could be 128 tokens for the 512 chunk and title prefix
+# could be another 128 tokens leaving 256 for the actual contents
 MAX_METADATA_PERCENTAGE = 0.25
 CHUNK_MIN_CONTENT = 256

+
 logger = setup_logger()


@@ -33,8 +36,16 @@ def _get_metadata_suffix_for_document_index(
    metadata: dict[str, str | list[str]], include_separator: bool = False
 ) -> tuple[str, str]:
    """
-    Returns the metadata as a natural language string representation with all of the keys and values
-    for the vector embedding and a string of all of the values for the keyword search.
+    Returns the metadata as a natural language string representation with all of the keys and values for the vector embedding
+    and a string of all of the values for the keyword search
+
+    For example, if we have the following metadata:
+    {
+        "author": "John Doe",
+        "space": "Engineering"
+    }
+    The vector embedding string should include the relation between the key and value wheres as for keyword we only want John Doe
+    and Engineering. The keys are repeat and much more noisy.
    """
    if not metadata:
        return "", ""
@@ -63,17 +74,12 @@ def _get_metadata_suffix_for_document_index(


 def _combine_chunks(chunks: list[DocAwareChunk], large_chunk_id: int) -> DocAwareChunk:
-    """
-    Combines multiple DocAwareChunks into one large chunk (for “multipass” mode),
-    appending the content and adjusting source_links accordingly.
-    """
    merged_chunk = DocAwareChunk(
        source_document=chunks[0].source_document,
        chunk_id=chunks[0].chunk_id,
        blurb=chunks[0].blurb,
        content=chunks[0].content,
        source_links=chunks[0].source_links or {},
-        image_file_name=None,
        section_continuation=(chunks[0].chunk_id > 0),
        title_prefix=chunks[0].title_prefix,
        metadata_suffix_semantic=chunks[0].metadata_suffix_semantic,
@@ -97,9 +103,6 @@ def _combine_chunks(chunks: list[DocAwareChunk], large_chunk_id: int) -> DocAwar


 def generate_large_chunks(chunks: list[DocAwareChunk]) -> list[DocAwareChunk]:
-    """
-    Generates larger “grouped” chunks by combining sets of smaller chunks.
-    """
    large_chunks = []
    for idx, i in enumerate(range(0, len(chunks), LARGE_CHUNK_RATIO)):
        chunk_group = chunks[i : i + LARGE_CHUNK_RATIO]
@@ -169,60 +172,23 @@ class Chunker:
        while start < total_tokens:
            end = min(start + content_token_limit, total_tokens)
            token_chunk = tokens[start:end]
+            # Join the tokens to reconstruct the text
            chunk_text = " ".join(token_chunk)
            chunks.append(chunk_text)
            start = end
        return chunks

    def _extract_blurb(self, text: str) -> str:
-        """
-        Extract a short blurb from the text (first chunk of size `blurb_size`).
-        """
        texts = self.blurb_splitter.split_text(text)
        if not texts:
            return ""
        return texts[0]

    def _get_mini_chunk_texts(self, chunk_text: str) -> list[str] | None:
-        """
-        For “multipass” mode: additional sub-chunks (mini-chunks) for use in certain embeddings.
-        """
        if self.mini_chunk_splitter and chunk_text.strip():
            return self.mini_chunk_splitter.split_text(chunk_text)
        return None

-    # ADDED: extra param image_url to store in the chunk
-    def _create_chunk(
-        self,
-        document: Document,
-        chunks_list: list[DocAwareChunk],
-        text: str,
-        links: dict[int, str],
-        is_continuation: bool = False,
-        title_prefix: str = "",
-        metadata_suffix_semantic: str = "",
-        metadata_suffix_keyword: str = "",
-        image_file_name: str | None = None,
-    ) -> None:
-        """
-        Helper to create a new DocAwareChunk, append it to chunks_list.
-        """
-        new_chunk = DocAwareChunk(
-            source_document=document,
-            chunk_id=len(chunks_list),
-            blurb=self._extract_blurb(text),
-            content=text,
-            source_links=links or {0: ""},
-            image_file_name=image_file_name,
-            section_continuation=is_continuation,
-            title_prefix=title_prefix,
-            metadata_suffix_semantic=metadata_suffix_semantic,
-            metadata_suffix_keyword=metadata_suffix_keyword,
-            mini_chunk_texts=self._get_mini_chunk_texts(text),
-            large_chunk_id=None,
-        )
-        chunks_list.append(new_chunk)
-
    def _chunk_document(
        self,
        document: Document,
@@ -232,156 +198,122 @@ class Chunker:
        content_token_limit: int,
    ) -> list[DocAwareChunk]:
        """
-        Loops through sections of the document, converting them into one or more chunks.
-        If a section has an image_link, we treat it as a dedicated chunk.
+        Loops through sections of the document, adds metadata and converts them into chunks.
        """
-
        chunks: list[DocAwareChunk] = []
        link_offsets: dict[int, str] = {}
        chunk_text = ""

+        def _create_chunk(
+            text: str,
+            links: dict[int, str],
+            is_continuation: bool = False,
+        ) -> DocAwareChunk:
+            return DocAwareChunk(
+                source_document=document,
+                chunk_id=len(chunks),
+                blurb=self._extract_blurb(text),
+                content=text,
+                source_links=links or {0: ""},
+                section_continuation=is_continuation,
+                title_prefix=title_prefix,
+                metadata_suffix_semantic=metadata_suffix_semantic,
+                metadata_suffix_keyword=metadata_suffix_keyword,
+                mini_chunk_texts=self._get_mini_chunk_texts(text),
+                large_chunk_id=None,
+            )
+
+        section_link_text: str
+
        for section_idx, section in enumerate(document.sections):
            section_text = clean_text(section.text)
            section_link_text = section.link or ""
-            # ADDED: if the Section has an image link
-            image_url = section.image_file_name
-
-            # If there is no useful content, skip
+            # If there is no useful content, not even the title, just drop it
            if not section_text and (not document.title or section_idx > 0):
+                # If a section is empty and the document has no title, we can just drop it. We return a list of
+                # DocAwareChunks where each one contains the necessary information needed down the line for indexing.
+                # There is no concern about dropping whole documents from this list, it should not cause any indexing failures.
                logger.warning(
-                    f"Skipping empty or irrelevant section in doc "
-                    f"{document.semantic_identifier}, link={section_link_text}"
+                    f"Skipping section {section.text} from document "
+                    f"{document.semantic_identifier} due to empty text after cleaning "
+                    f"with link {section_link_text}"
                )
                continue

-            # CASE 1: If this is an image section, force a separate chunk
-            if image_url:
-                # First, if we have any partially built text chunk, finalize it
-                if chunk_text.strip():
-                    self._create_chunk(
-                        document,
-                        chunks,
-                        chunk_text,
-                        link_offsets,
-                        is_continuation=False,
-                        title_prefix=title_prefix,
-                        metadata_suffix_semantic=metadata_suffix_semantic,
-                        metadata_suffix_keyword=metadata_suffix_keyword,
-                    )
-                    chunk_text = ""
-                    link_offsets = {}
-
-                # Create a chunk specifically for this image
-                # (If the section has text describing the image, use that as content)
-                self._create_chunk(
-                    document,
-                    chunks,
-                    section_text,
-                    links={0: section_link_text}
-                    if section_link_text
-                    else {},  # No text offsets needed for images
-                    image_file_name=image_url,
-                    title_prefix=title_prefix,
-                    metadata_suffix_semantic=metadata_suffix_semantic,
-                    metadata_suffix_keyword=metadata_suffix_keyword,
-                )
-                # Continue to next section
-                continue
-
-            # CASE 2: Normal text section
            section_token_count = len(self.tokenizer.tokenize(section_text))

-            # If the section is large on its own, split it separately
+            # Large sections are considered self-contained/unique
+            # Therefore, they start a new chunk and are not concatenated
+            # at the end by other sections
            if section_token_count > content_token_limit:
-                if chunk_text.strip():
-                    self._create_chunk(
-                        document,
-                        chunks,
-                        chunk_text,
-                        link_offsets,
-                        False,
-                        title_prefix,
-                        metadata_suffix_semantic,
-                        metadata_suffix_keyword,
-                    )
-                    chunk_text = ""
+                if chunk_text:
+                    chunks.append(_create_chunk(chunk_text, link_offsets))
                    link_offsets = {}
+                    chunk_text = ""

                split_texts = self.chunk_splitter.split_text(section_text)
+
                for i, split_text in enumerate(split_texts):
-                    # If even the split_text is bigger than strict limit, further split
                    if (
                        STRICT_CHUNK_TOKEN_LIMIT
-                        and len(self.tokenizer.tokenize(split_text))
-                        > content_token_limit
+                        and
+                        # Tokenizer only runs if STRICT_CHUNK_TOKEN_LIMIT is true
+                        len(self.tokenizer.tokenize(split_text)) > content_token_limit
                    ):
+                        # If STRICT_CHUNK_TOKEN_LIMIT is true, manually check
+                        # the token count of each split text to ensure it is
+                        # not larger than the content_token_limit
                        smaller_chunks = self._split_oversized_chunk(
                            split_text, content_token_limit
                        )
-                        for j, small_chunk in enumerate(smaller_chunks):
-                            self._create_chunk(
-                                document,
-                                chunks,
-                                small_chunk,
-                                {0: section_link_text},
-                                is_continuation=(j != 0),
-                                title_prefix=title_prefix,
-                                metadata_suffix_semantic=metadata_suffix_semantic,
-                                metadata_suffix_keyword=metadata_suffix_keyword,
+                        for i, small_chunk in enumerate(smaller_chunks):
+                            chunks.append(
+                                _create_chunk(
+                                    text=small_chunk,
+                                    links={0: section_link_text},
+                                    is_continuation=(i != 0),
+                                )
                            )
                    else:
-                        self._create_chunk(
-                            document,
-                            chunks,
-                            split_text,
-                            {0: section_link_text},
-                            is_continuation=(i != 0),
-                            title_prefix=title_prefix,
-                            metadata_suffix_semantic=metadata_suffix_semantic,
-                            metadata_suffix_keyword=metadata_suffix_keyword,
+                        chunks.append(
+                            _create_chunk(
+                                text=split_text,
+                                links={0: section_link_text},
+                                is_continuation=(i != 0),
+                            )
                        )
+
                continue

-            # If we can still fit this section into the current chunk, do so
            current_token_count = len(self.tokenizer.tokenize(chunk_text))
            current_offset = len(shared_precompare_cleanup(chunk_text))
+            # In the case where the whole section is shorter than a chunk, either add
+            # to chunk or start a new one
            next_section_tokens = (
                len(self.tokenizer.tokenize(SECTION_SEPARATOR)) + section_token_count
            )
-
            if next_section_tokens + current_token_count <= content_token_limit:
                if chunk_text:
                    chunk_text += SECTION_SEPARATOR
                chunk_text += section_text
                link_offsets[current_offset] = section_link_text
            else:
-                # finalize the existing chunk
-                self._create_chunk(
-                    document,
-                    chunks,
-                    chunk_text,
-                    link_offsets,
-                    False,
-                    title_prefix,
-                    metadata_suffix_semantic,
-                    metadata_suffix_keyword,
-                )
-                # start a new chunk
+                chunks.append(_create_chunk(chunk_text, link_offsets))
                link_offsets = {0: section_link_text}
                chunk_text = section_text

-        # finalize any leftover text chunk
+        # Once we hit the end, if we're still in the process of building a chunk, add what we have.
+        # If there is only whitespace left then don't include it. If there are no chunks at all
+        # from the doc, we can just create a single chunk with the title.
        if chunk_text.strip() or not chunks:
-            self._create_chunk(
-                document,
-                chunks,
-                chunk_text,
-                link_offsets or {0: ""},  # safe default
-                False,
-                title_prefix,
-                metadata_suffix_semantic,
-                metadata_suffix_keyword,
+            chunks.append(
+                _create_chunk(
+                    chunk_text,
+                    link_offsets or {0: section_link_text},
+                )
            )
+
+        # If the chunk does not have any useable content, it will not be indexed
        return chunks

    def _handle_single_document(self, document: Document) -> list[DocAwareChunk]:
@@ -389,12 +321,10 @@ class Chunker:
        if document.source == DocumentSource.GMAIL:
            logger.debug(f"Chunking {document.semantic_identifier}")

-        # Title prep
        title = self._extract_blurb(document.get_title_for_document_index() or "")
        title_prefix = title + RETURN_SEPARATOR if title else ""
        title_tokens = len(self.tokenizer.tokenize(title_prefix))

-        # Metadata prep
        metadata_suffix_semantic = ""
        metadata_suffix_keyword = ""
        metadata_tokens = 0
@@ -407,20 +337,19 @@ class Chunker:
            )
            metadata_tokens = len(self.tokenizer.tokenize(metadata_suffix_semantic))

-        # If metadata is too large, skip it in the semantic content
        if metadata_tokens >= self.chunk_token_limit * MAX_METADATA_PERCENTAGE:
+            # Note: we can keep the keyword suffix even if the semantic suffix is too long to fit in the model
+            # context, there is no limit for the keyword component
            metadata_suffix_semantic = ""
            metadata_tokens = 0

-        # Adjust content token limit to accommodate title + metadata
        content_token_limit = self.chunk_token_limit - title_tokens - metadata_tokens
+        # If there is not enough context remaining then just index the chunk with no prefix/suffix
        if content_token_limit <= CHUNK_MIN_CONTENT:
-            # Not enough space left, so revert to full chunk without the prefix
            content_token_limit = self.chunk_token_limit
            title_prefix = ""
            metadata_suffix_semantic = ""

-        # Chunk the document
        normal_chunks = self._chunk_document(
            document,
            title_prefix,
@@ -429,7 +358,6 @@ class Chunker:
            content_token_limit,
        )

-        # Optional “multipass” large chunk creation
        if self.enable_multipass and self.enable_large_chunks:
            large_chunks = generate_large_chunks(normal_chunks)
            normal_chunks.extend(large_chunks)
@@ -443,8 +371,9 @@ class Chunker:
        """
        final_chunks: list[DocAwareChunk] = []
        for document in documents:
-            if self.callback and self.callback.should_stop():
-                raise RuntimeError("Chunker.chunk: Stop signal detected")
+            if self.callback:
+                if self.callback.should_stop():
+                    raise RuntimeError("Chunker.chunk: Stop signal detected")

            chunks = self._handle_single_document(document)
            final_chunks.extend(chunks)
--- a/backend/onyx/indexing/indexing_pipeline.py
+++ b/backend/onyx/indexing/indexing_pipeline.py
@@ -464,29 +464,12 @@ def index_doc_batch(
            ),
        )

-        all_returned_doc_ids = (
-            {record.document_id for record in insertion_records}
-            .union(
-                {
-                    record.failed_document.document_id
-                    for record in vector_db_write_failures
-                    if record.failed_document
-                }
-            )
-            .union(
-                {
-                    record.failed_document.document_id
-                    for record in embedding_failures
-                    if record.failed_document
-                }
-            )
-        )
-        if all_returned_doc_ids != set(updatable_ids):
+        successful_doc_ids = {record.document_id for record in insertion_records}
+        if successful_doc_ids != set(updatable_ids):
            raise RuntimeError(
                f"Some documents were not successfully indexed. "
                f"Updatable IDs: {updatable_ids}, "
-                f"Returned IDs: {all_returned_doc_ids}. "
-                "This should never happen."
+                f"Successful IDs: {successful_doc_ids}"
            )

        last_modified_ids = []
--- a/backend/onyx/indexing/models.py
+++ b/backend/onyx/indexing/models.py
@@ -29,7 +29,6 @@ class BaseChunk(BaseModel):
    content: str
    # Holds the link and the offsets into the raw Chunk text
    source_links: dict[int, str] | None
-    image_file_name: str | None
    # True if this Chunk's start is not at the start of a Section
    section_continuation: bool

--- a/backend/onyx/llm/chat_llm.py
+++ b/backend/onyx/llm/chat_llm.py
@@ -167,7 +167,7 @@ def _convert_delta_to_message_chunk(
    stop_reason: str | None = None,
 ) -> BaseMessageChunk:
    """Adapted from langchain_community.chat_models.litellm._convert_delta_to_message_chunk"""
-    role = _dict.get("role") or (_base_msg_to_role(curr_msg) if curr_msg else "unknown")
+    role = _dict.get("role") or (_base_msg_to_role(curr_msg) if curr_msg else None)
    content = _dict.get("content") or ""
    additional_kwargs = {}
    if _dict.get("function_call"):
@@ -402,7 +402,6 @@ class DefaultMultiLLM(LLM):
        stream: bool,
        structured_response_format: dict | None = None,
        timeout_override: int | None = None,
-        max_tokens: int | None = None,
    ) -> litellm.ModelResponse | litellm.CustomStreamWrapper:
        # litellm doesn't accept LangChain BaseMessage objects, so we need to convert them
        # to a dict representation
@@ -430,7 +429,6 @@ class DefaultMultiLLM(LLM):
                # model params
                temperature=0,
                timeout=timeout_override or self._timeout,
-                max_tokens=max_tokens,
                # For now, we don't support parallel tool calls
                # NOTE: we can't pass this in if tools are not specified
                # or else OpenAI throws an error
@@ -486,7 +484,6 @@ class DefaultMultiLLM(LLM):
        tool_choice: ToolChoiceOptions | None = None,
        structured_response_format: dict | None = None,
        timeout_override: int | None = None,
-        max_tokens: int | None = None,
    ) -> BaseMessage:
        if LOG_DANSWER_MODEL_INTERACTIONS:
            self.log_model_configs()
@@ -500,7 +497,6 @@ class DefaultMultiLLM(LLM):
                stream=False,
                structured_response_format=structured_response_format,
                timeout_override=timeout_override,
-                max_tokens=max_tokens,
            ),
        )
        choice = response.choices[0]
@@ -519,7 +515,6 @@ class DefaultMultiLLM(LLM):
        tool_choice: ToolChoiceOptions | None = None,
        structured_response_format: dict | None = None,
        timeout_override: int | None = None,
-        max_tokens: int | None = None,
    ) -> Iterator[BaseMessage]:
        if LOG_DANSWER_MODEL_INTERACTIONS:
            self.log_model_configs()
@@ -544,7 +539,6 @@ class DefaultMultiLLM(LLM):
                stream=True,
                structured_response_format=structured_response_format,
                timeout_override=timeout_override,
-                max_tokens=max_tokens,
            ),
        )
        try:
--- a/backend/onyx/llm/custom_llm.py
+++ b/backend/onyx/llm/custom_llm.py
@@ -82,7 +82,6 @@ class CustomModelServer(LLM):
        tool_choice: ToolChoiceOptions | None = None,
        structured_response_format: dict | None = None,
        timeout_override: int | None = None,
-        max_tokens: int | None = None,
    ) -> BaseMessage:
        return self._execute(prompt)

@@ -93,6 +92,5 @@ class CustomModelServer(LLM):
        tool_choice: ToolChoiceOptions | None = None,
        structured_response_format: dict | None = None,
        timeout_override: int | None = None,
-        max_tokens: int | None = None,
    ) -> Iterator[BaseMessage]:
        yield self._execute(prompt)
--- a/backend/onyx/llm/factory.py
+++ b/backend/onyx/llm/factory.py
@@ -6,14 +6,12 @@ from onyx.configs.model_configs import GEN_AI_MODEL_FALLBACK_MAX_TOKENS
 from onyx.configs.model_configs import GEN_AI_TEMPERATURE
 from onyx.db.engine import get_session_context_manager
 from onyx.db.llm import fetch_default_provider
-from onyx.db.llm import fetch_existing_llm_providers
 from onyx.db.llm import fetch_provider
 from onyx.db.models import Persona
 from onyx.llm.chat_llm import DefaultMultiLLM
 from onyx.llm.exceptions import GenAIDisabledException
 from onyx.llm.interfaces import LLM
 from onyx.llm.override_models import LLMOverride
-from onyx.llm.utils import model_supports_image_input
 from onyx.utils.headers import build_llm_extra_headers
 from onyx.utils.logger import setup_logger
 from onyx.utils.long_term_log import LongTermLogger
@@ -88,48 +86,6 @@ def get_llms_for_persona(
    return _create_llm(model), _create_llm(fast_model)


-def get_default_llm_with_vision(
-    timeout: int | None = None,
-    temperature: float | None = None,
-    additional_headers: dict[str, str] | None = None,
-    long_term_logger: LongTermLogger | None = None,
-) -> LLM | None:
-    if DISABLE_GENERATIVE_AI:
-        raise GenAIDisabledException()
-
-    with get_session_context_manager() as db_session:
-        llm_providers = fetch_existing_llm_providers(db_session)
-
-    if not llm_providers:
-        return None
-
-    for provider in llm_providers:
-        model_name = provider.default_model_name
-        fast_model_name = (
-            provider.fast_default_model_name or provider.default_model_name
-        )
-
-        if not model_name or not fast_model_name:
-            continue
-
-        if model_supports_image_input(model_name, provider.provider):
-            return get_llm(
-                provider=provider.provider,
-                model=model_name,
-                deployment_name=provider.deployment_name,
-                api_key=provider.api_key,
-                api_base=provider.api_base,
-                api_version=provider.api_version,
-                custom_config=provider.custom_config,
-                timeout=timeout,
-                temperature=temperature,
-                additional_headers=additional_headers,
-                long_term_logger=long_term_logger,
-            )
-
-    raise ValueError("No LLM provider found that supports image input")
-
-
 def get_default_llms(
    timeout: int | None = None,
    temperature: float | None = None,
--- a/backend/onyx/llm/interfaces.py
+++ b/backend/onyx/llm/interfaces.py
@@ -91,18 +91,12 @@ class LLM(abc.ABC):
        tool_choice: ToolChoiceOptions | None = None,
        structured_response_format: dict | None = None,
        timeout_override: int | None = None,
-        max_tokens: int | None = None,
    ) -> BaseMessage:
        self._precall(prompt)
        # TODO add a postcall to log model outputs independent of concrete class
        # implementation
        return self._invoke_implementation(
-            prompt,
-            tools,
-            tool_choice,
-            structured_response_format,
-            timeout_override,
-            max_tokens,
+            prompt, tools, tool_choice, structured_response_format, timeout_override
        )

    @abc.abstractmethod
@@ -113,7 +107,6 @@ class LLM(abc.ABC):
        tool_choice: ToolChoiceOptions | None = None,
        structured_response_format: dict | None = None,
        timeout_override: int | None = None,
-        max_tokens: int | None = None,
    ) -> BaseMessage:
        raise NotImplementedError

@@ -124,18 +117,12 @@ class LLM(abc.ABC):
        tool_choice: ToolChoiceOptions | None = None,
        structured_response_format: dict | None = None,
        timeout_override: int | None = None,
-        max_tokens: int | None = None,
    ) -> Iterator[BaseMessage]:
        self._precall(prompt)
        # TODO add a postcall to log model outputs independent of concrete class
        # implementation
        messages = self._stream_implementation(
-            prompt,
-            tools,
-            tool_choice,
-            structured_response_format,
-            timeout_override,
-            max_tokens,
+            prompt, tools, tool_choice, structured_response_format, timeout_override
        )

        tokens = []
@@ -155,6 +142,5 @@ class LLM(abc.ABC):
        tool_choice: ToolChoiceOptions | None = None,
        structured_response_format: dict | None = None,
        timeout_override: int | None = None,
-        max_tokens: int | None = None,
    ) -> Iterator[BaseMessage]:
        raise NotImplementedError
--- a/backend/onyx/main.py
+++ b/backend/onyx/main.py
@@ -51,7 +51,6 @@ from onyx.server.documents.cc_pair import router as cc_pair_router
 from onyx.server.documents.connector import router as connector_router
 from onyx.server.documents.credential import router as credential_router
 from onyx.server.documents.document import router as document_router
-from onyx.server.documents.standard_oauth import router as standard_oauth_router
 from onyx.server.features.document_set.api import router as document_set_router
 from onyx.server.features.folder.api import router as folder_router
 from onyx.server.features.input_prompt.api import (
@@ -323,7 +322,6 @@ def get_application() -> FastAPI:
    )
    include_router_with_global_prefix_prepended(application, long_term_logs_router)
    include_router_with_global_prefix_prepended(application, api_key_router)
-    include_router_with_global_prefix_prepended(application, standard_oauth_router)

    if AUTH_TYPE == AuthType.DISABLED:
        # Server logs this during auth setup verification step
--- a/backend/onyx/prompts/image_analysis.py
+++ b/backend/onyx/prompts/image_analysis.py
@@ -1,22 +0,0 @@
-# Used for creating embeddings of images for vector search
-IMAGE_SUMMARIZATION_SYSTEM_PROMPT = """
-You are an assistant for summarizing images for retrieval.
-Summarize the content of the following image and be as precise as possible.
-The summary will be embedded and used to retrieve the original image.
-Therefore, write a concise summary of the image that is optimized for retrieval.
-"""
-
-# Prompt for generating image descriptions with filename context
-IMAGE_SUMMARIZATION_USER_PROMPT = """
-The image has the file name '{title}'.
-Describe precisely and concisely what the image shows.
-"""
-
-
-# Used for analyzing images in response to user queries at search time
-IMAGE_ANALYSIS_SYSTEM_PROMPT = (
-    "You are an AI assistant specialized in describing images.\n"
-    "You will receive a user question plus an image URL. Provide a concise textual answer.\n"
-    "Focus on aspects of the image that are relevant to the user's question.\n"
-    "Be specific and detailed about visual elements that directly address the query.\n"
-)
--- a/backend/onyx/seeding/load_docs.py
+++ b/backend/onyx/seeding/load_docs.py
@@ -55,11 +55,7 @@ def _create_indexable_chunks(
            # The section is not really used past this point since we have already done the other processing
            # for the chunking and embedding.
            sections=[
-                Section(
-                    text=preprocessed_doc["content"],
-                    link=preprocessed_doc["url"],
-                    image_file_name=None,
-                )
+                Section(text=preprocessed_doc["content"], link=preprocessed_doc["url"])
            ],
            source=DocumentSource.WEB,
            semantic_identifier=preprocessed_doc["title"],
@@ -97,7 +93,6 @@ def _create_indexable_chunks(
            document_sets=set(),
            boost=DEFAULT_BOOST,
            large_chunk_id=None,
-            image_file_name=None,
        )

        chunks.append(chunk)
--- a/backend/onyx/server/settings/models.py
+++ b/backend/onyx/server/settings/models.py
@@ -53,11 +53,6 @@ class Settings(BaseModel):
    auto_scroll: bool | None = False
    query_history_type: QueryHistoryType | None = None

-    # Image processing settings
-    image_extraction_and_analysis_enabled: bool | None = False
-    search_time_image_analysis_enabled: bool | None = False
-    image_analysis_max_size_mb: int | None = 20
-

 class UserSettings(Settings):
    notifications: list[Notification]
--- a/backend/onyx/server/settings/store.py
+++ b/backend/onyx/server/settings/store.py
@@ -47,7 +47,6 @@ def load_settings() -> Settings:

    settings.anonymous_user_enabled = anonymous_user_enabled
    settings.query_history_type = ONYX_QUERY_HISTORY_TYPE
-
    return settings


--- a/backend/onyx/tools/models.py
+++ b/backend/onyx/tools/models.py
@@ -9,7 +9,6 @@ from sqlalchemy.orm import Session
 from onyx.context.search.enums import SearchType
 from onyx.context.search.models import IndexFilters
 from onyx.context.search.models import InferenceSection
-from shared_configs.model_server_models import Embedding


 class ToolResponse(BaseModel):
@@ -61,15 +60,11 @@ class SearchQueryInfo(BaseModel):
    recency_bias_multiplier: float


-# None indicates that the default value should be used
 class SearchToolOverrideKwargs(BaseModel):
-    force_no_rerank: bool | None = None
-    alternate_db_session: Session | None = None
-    retrieved_sections_callback: Callable[[list[InferenceSection]], None] | None = None
-    skip_query_analysis: bool | None = None
-    precomputed_query_embedding: Embedding | None = None
-    precomputed_is_keyword: bool | None = None
-    precomputed_keywords: list[str] | None = None
+    force_no_rerank: bool
+    alternate_db_session: Session | None
+    retrieved_sections_callback: Callable[[list[InferenceSection]], None] | None
+    skip_query_analysis: bool

    class Config:
        arbitrary_types_allowed = True
--- a/backend/onyx/tools/tool_implementations/search/search_tool.py
+++ b/backend/onyx/tools/tool_implementations/search/search_tool.py
@@ -3,7 +3,6 @@ from collections.abc import Callable
 from collections.abc import Generator
 from typing import Any
 from typing import cast
-from typing import TypeVar

 from sqlalchemy.orm import Session

@@ -12,6 +11,7 @@ from onyx.chat.models import AnswerStyleConfig
 from onyx.chat.models import ContextualPruningConfig
 from onyx.chat.models import DocumentPruningConfig
 from onyx.chat.models import LlmDoc
+from onyx.chat.models import OnyxContext
 from onyx.chat.models import OnyxContexts
 from onyx.chat.models import PromptConfig
 from onyx.chat.models import SectionRelevancePiece
@@ -42,9 +42,6 @@ from onyx.tools.models import SearchQueryInfo
 from onyx.tools.models import SearchToolOverrideKwargs
 from onyx.tools.models import ToolResponse
 from onyx.tools.tool import Tool
-from onyx.tools.tool_implementations.search.search_utils import (
-    context_from_inference_section,
-)
 from onyx.tools.tool_implementations.search.search_utils import llm_doc_to_dict
 from onyx.tools.tool_implementations.search_like_tool_utils import (
    build_next_prompt_for_search_like_tool,
@@ -284,23 +281,16 @@ class SearchTool(Tool[SearchToolOverrideKwargs]):
        self, override_kwargs: SearchToolOverrideKwargs | None = None, **llm_kwargs: Any
    ) -> Generator[ToolResponse, None, None]:
        query = cast(str, llm_kwargs[QUERY_FIELD])
-        precomputed_query_embedding = None
-        precomputed_is_keyword = None
-        precomputed_keywords = None
        force_no_rerank = False
        alternate_db_session = None
        retrieved_sections_callback = None
        skip_query_analysis = False
        if override_kwargs:
-            force_no_rerank = use_alt_not_None(override_kwargs.force_no_rerank, False)
+            force_no_rerank = override_kwargs.force_no_rerank
            alternate_db_session = override_kwargs.alternate_db_session
            retrieved_sections_callback = override_kwargs.retrieved_sections_callback
-            skip_query_analysis = use_alt_not_None(
-                override_kwargs.skip_query_analysis, False
-            )
-            precomputed_query_embedding = override_kwargs.precomputed_query_embedding
-            precomputed_is_keyword = override_kwargs.precomputed_is_keyword
-            precomputed_keywords = override_kwargs.precomputed_keywords
+            skip_query_analysis = override_kwargs.skip_query_analysis
+
        if self.selected_sections:
            yield from self._build_response_for_specified_sections(query)
            return
@@ -337,9 +327,6 @@ class SearchTool(Tool[SearchToolOverrideKwargs]):
                    if self.retrieval_options
                    else None
                ),
-                precomputed_query_embedding=precomputed_query_embedding,
-                precomputed_is_keyword=precomputed_is_keyword,
-                precomputed_keywords=precomputed_keywords,
            ),
            user=self.user,
            llm=self.llm,
@@ -358,9 +345,8 @@ class SearchTool(Tool[SearchToolOverrideKwargs]):
        )
        yield from yield_search_responses(
            query,
-            lambda: search_pipeline.retrieved_sections,
-            lambda: search_pipeline.reranked_sections,
-            lambda: search_pipeline.final_context_sections,
+            search_pipeline.reranked_sections,
+            search_pipeline.final_context_sections,
            search_query_info,
            lambda: search_pipeline.section_relevance,
            self,
@@ -397,16 +383,10 @@ class SearchTool(Tool[SearchToolOverrideKwargs]):
 # SearchTool passed in to allow for access to SearchTool properties.
 # We can't just call SearchTool methods in the graph because we're operating on
 # the retrieved docs (reranking, deduping, etc.) after the SearchTool has run.
-#
-# The various inference sections are passed in as functions to allow for lazy
-# evaluation. The SearchPipeline object properties that they correspond to are
-# actually functions defined with @property decorators, and passing them into
-# this function causes them to get evaluated immediately which is undesirable.
 def yield_search_responses(
    query: str,
-    get_retrieved_sections: Callable[[], list[InferenceSection]],
-    get_reranked_sections: Callable[[], list[InferenceSection]],
-    get_final_context_sections: Callable[[], list[InferenceSection]],
+    reranked_sections: list[InferenceSection],
+    final_context_sections: list[InferenceSection],
    search_query_info: SearchQueryInfo,
    get_section_relevance: Callable[[], list[SectionRelevancePiece] | None],
    search_tool: SearchTool,
@@ -415,7 +395,7 @@ def yield_search_responses(
        id=SEARCH_RESPONSE_SUMMARY_ID,
        response=SearchResponseSummary(
            rephrased_query=query,
-            top_sections=get_retrieved_sections(),
+            top_sections=final_context_sections,
            predicted_flow=QueryFlow.QUESTION_ANSWER,
            predicted_search=search_query_info.predicted_search,
            final_filters=search_query_info.final_filters,
@@ -427,8 +407,13 @@ def yield_search_responses(
        id=SEARCH_DOC_CONTENT_ID,
        response=OnyxContexts(
            contexts=[
-                context_from_inference_section(section)
-                for section in get_reranked_sections()
+                OnyxContext(
+                    content=section.combined_content,
+                    document_id=section.center_chunk.document_id,
+                    semantic_identifier=section.center_chunk.semantic_identifier,
+                    blurb=section.center_chunk.blurb,
+                )
+                for section in reranked_sections
            ]
        ),
    )
@@ -439,7 +424,6 @@ def yield_search_responses(
        response=section_relevance,
    )

-    final_context_sections = get_final_context_sections()
    pruned_sections = prune_sections(
        sections=final_context_sections,
        section_relevance_list=section_relevance_list_impl(
@@ -454,10 +438,3 @@ def yield_search_responses(
    llm_docs = [llm_doc_from_inference_section(section) for section in pruned_sections]

    yield ToolResponse(id=FINAL_CONTEXT_DOCUMENTS_ID, response=llm_docs)
-
-
-T = TypeVar("T")
-
-
-def use_alt_not_None(value: T | None, alt: T) -> T:
-    return value if value is not None else alt
--- a/backend/onyx/tools/tool_implementations/search/search_utils.py
+++ b/backend/onyx/tools/tool_implementations/search/search_utils.py
@@ -1,5 +1,4 @@
 from onyx.chat.models import LlmDoc
-from onyx.chat.models import OnyxContext
 from onyx.context.search.models import InferenceSection
 from onyx.prompts.prompt_utils import clean_up_source

@@ -30,12 +29,3 @@ def section_to_dict(section: InferenceSection, section_num: int) -> dict:
            "%B %d, %Y %H:%M"
        )
    return doc_dict
-
-
-def context_from_inference_section(section: InferenceSection) -> OnyxContext:
-    return OnyxContext(
-        content=section.combined_content,
-        document_id=section.center_chunk.document_id,
-        semantic_identifier=section.center_chunk.semantic_identifier,
-        blurb=section.center_chunk.blurb,
-    )
--- a/backend/onyx/tools/tool_runner.py
+++ b/backend/onyx/tools/tool_runner.py
@@ -1,8 +1,6 @@
 from collections.abc import Callable
 from collections.abc import Generator
 from typing import Any
-from typing import Generic
-from typing import TypeVar

 from onyx.llm.interfaces import LLM
 from onyx.llm.models import PreviousMessage
@@ -13,16 +11,10 @@ from onyx.tools.tool import Tool
 from onyx.utils.threadpool_concurrency import run_functions_tuples_in_parallel


-R = TypeVar("R")
-
-
-class ToolRunner(Generic[R]):
-    def __init__(
-        self, tool: Tool[R], args: dict[str, Any], override_kwargs: R | None = None
-    ):
+class ToolRunner:
+    def __init__(self, tool: Tool, args: dict[str, Any]):
        self.tool = tool
        self.args = args
-        self.override_kwargs = override_kwargs

        self._tool_responses: list[ToolResponse] | None = None

@@ -35,9 +27,7 @@ class ToolRunner(Generic[R]):
            return

        tool_responses: list[ToolResponse] = []
-        for tool_response in self.tool.run(
-            override_kwargs=self.override_kwargs, **self.args
-        ):
+        for tool_response in self.tool.run(**self.args):
            yield tool_response
            tool_responses.append(tool_response)

--- a/backend/onyx/utils/error_handling.py
+++ b/backend/onyx/utils/error_handling.py
@@ -1,23 +0,0 @@
-"""
-Standardized error handling utilities.
-"""
-from onyx.configs.app_configs import CONTINUE_ON_CONNECTOR_FAILURE
-from onyx.utils.logger import setup_logger
-
-logger = setup_logger()
-
-
-def handle_connector_error(e: Exception, context: str) -> None:
-    """
-    Standard error handling for connectors.
-
-    Args:
-        e: The exception that was raised
-        context: A description of where the error occurred
-
-    Raises:
-        The original exception if CONTINUE_ON_CONNECTOR_FAILURE is False
-    """
-    logger.error(f"Error in {context}: {e}", exc_info=e)
-    if not CONTINUE_ON_CONNECTOR_FAILURE:
-        raise
--- a/backend/onyx/utils/threadpool_concurrency.py
+++ b/backend/onyx/utils/threadpool_concurrency.py
@@ -118,7 +118,7 @@ def run_functions_in_parallel(
    return results


-class TimeoutThread(threading.Thread, Generic[R]):
+class TimeoutThread(threading.Thread):
    def __init__(
        self, timeout: float, func: Callable[..., R], *args: Any, **kwargs: Any
    ):
@@ -159,34 +159,3 @@ def run_with_timeout(
        task.end()

    return task.result
-
-
-# NOTE: this function should really only be used when run_functions_tuples_in_parallel is
-# difficult to use. It's up to the programmer to call wait_on_background on the thread after
-# the code you want to run in parallel is finished. As with all python thread parallelism,
-# this is only useful for I/O bound tasks.
-def run_in_background(
-    func: Callable[..., R], *args: Any, **kwargs: Any
-) -> TimeoutThread[R]:
-    """
-    Runs a function in a background thread. Returns a TimeoutThread object that can be used
-    to wait for the function to finish with wait_on_background.
-    """
-    context = contextvars.copy_context()
-    # Timeout not used in the non-blocking case
-    task = TimeoutThread(-1, context.run, func, *args, **kwargs)
-    task.start()
-    return task
-
-
-def wait_on_background(task: TimeoutThread[R]) -> R:
-    """
-    Used in conjunction with run_in_background. blocks until the task is finished,
-    then returns the result of the task.
-    """
-    task.join()
-
-    if task.exception is not None:
-        raise task.exception
-
-    return task.result
--- a/backend/requirements/default.txt
+++ b/backend/requirements/default.txt
@@ -1,10 +1,9 @@
-aioboto3==14.0.0
 aiohttp==3.10.2
 alembic==1.10.4
 asyncpg==0.27.0
 atlassian-python-api==3.41.16
 beautifulsoup4==4.12.3
-boto3==1.36.23
+boto3==1.34.84
 celery==5.5.0b4
 chardet==5.2.0
 dask==2023.8.1
--- a/backend/requirements/model_server.txt
+++ b/backend/requirements/model_server.txt
@@ -13,5 +13,4 @@ transformers==4.39.2
 uvicorn==0.21.1
 voyageai==0.2.3
 litellm==1.61.16
-sentry-sdk[fastapi,celery,starlette]==2.14.0
-aioboto3==13.4.0
+sentry-sdk[fastapi,celery,starlette]==2.14.0
--- a/backend/scripts/chat_history_seeding.py
+++ b/backend/scripts/chat_history_seeding.py
@@ -1,45 +0,0 @@
-import argparse
-import logging
-from logging import getLogger
-
-from onyx.db.seeding.chat_history_seeding import seed_chat_history
-
-# Configure the logger
-logging.basicConfig(
-    level=logging.INFO,  # Set the log level (DEBUG, INFO, WARNING, ERROR, CRITICAL)
-    format="%(asctime)s - %(name)s - %(levelname)s - %(message)s",  # Log format
-    handlers=[logging.StreamHandler()],  # Output logs to console
-)
-
-logger = getLogger(__name__)
-
-
-def go_main(num_sessions: int, num_messages: int, num_days: int) -> None:
-    seed_chat_history(num_sessions, num_messages, num_days)
-
-
-if __name__ == "__main__":
-    parser = argparse.ArgumentParser(description="Seed chat history")
-    parser.add_argument(
-        "--sessions",
-        type=int,
-        default=2048,
-        help="Number of chat sessions to seed",
-    )
-
-    parser.add_argument(
-        "--messages",
-        type=int,
-        default=4,
-        help="Number of chat messages to seed per session",
-    )
-
-    parser.add_argument(
-        "--days",
-        type=int,
-        default=90,
-        help="Number of days looking backwards over which to seed the timestamps with",
-    )
-
-    args = parser.parse_args()
-    go_main(args.sessions, args.messages, args.days)
--- a/backend/scripts/debugging/onyx_vespa.py
+++ b/backend/scripts/debugging/onyx_vespa.py
@@ -207,7 +207,7 @@ def query_vespa(
    yql: str, tenant_id: Optional[str] = None, limit: int = 10
 ) -> List[Dict[str, Any]]:
    # Perform a Vespa query using YQL syntax.
-    filters = IndexFilters(tenant_id=None, access_control_list=[])
+    filters = IndexFilters(tenant_id=tenant_id, access_control_list=[])
    filter_string = build_vespa_filters(filters, remove_trailing_and=True)
    full_yql = yql.strip()
    if filter_string:
@@ -472,7 +472,9 @@ def get_document_acls(
            print("-" * 80)


-def get_current_chunk_count(document_id: str) -> int | None:
+def get_current_chunk_count(
+    document_id: str, index_name: str, tenant_id: str
+) -> int | None:
    with get_session_with_current_tenant() as session:
        return (
            session.query(Document.chunk_count)
@@ -484,7 +486,7 @@ def get_current_chunk_count(document_id: str) -> int | None:
 def get_number_of_chunks_we_think_exist(
    document_id: str, index_name: str, tenant_id: str
 ) -> int:
-    current_chunk_count = get_current_chunk_count(document_id)
+    current_chunk_count = get_current_chunk_count(document_id, index_name, tenant_id)
    print(f"Current chunk count: {current_chunk_count}")

    doc_info = VespaIndex.enrich_basic_chunk_info(
@@ -634,7 +636,6 @@ def delete_where(
    Removes visited documents in `cluster` where the given selection
    is true, using Vespa's 'delete where' endpoint.

-
    :param index_name: Typically <namespace>/<document-type> from your schema
    :param selection:  The selection string, e.g., "true" or "foo contains 'bar'"
    :param cluster:    The name of the cluster where documents reside
@@ -798,7 +799,7 @@ def main() -> None:
    args = parser.parse_args()
    vespa_debug = VespaDebugging(args.tenant_id)

-    CURRENT_TENANT_ID_CONTEXTVAR.set(args.tenant_id or "public")
+    CURRENT_TENANT_ID_CONTEXTVAR.set(args.tenant_id)
    if args.action == "delete-all-documents":
        if not args.tenant_id:
            parser.error("--tenant-id is required for delete-all-documents action")
--- a/backend/scripts/query_time_check/seed_dummy_docs.py
+++ b/backend/scripts/query_time_check/seed_dummy_docs.py
@@ -71,7 +71,6 @@ def generate_dummy_chunk(
        title_embedding=generate_random_embedding(embedding_dim),
        large_chunk_id=None,
        large_chunk_reference_ids=[],
-        image_file_name=None,
    )

    document_set_names = []
--- a/backend/shared_configs/configs.py
+++ b/backend/shared_configs/configs.py
@@ -68,12 +68,6 @@ LOG_LEVEL = os.environ.get("LOG_LEVEL", "info")
 # allow us to specify a custom timeout
 API_BASED_EMBEDDING_TIMEOUT = int(os.environ.get("API_BASED_EMBEDDING_TIMEOUT", "600"))

-# Local batch size for VertexAI embedding models currently calibrated for item size of 512 tokens
-# NOTE: increasing this value may lead to API errors due to token limit exhaustion per call.
-VERTEXAI_EMBEDDING_LOCAL_BATCH_SIZE = int(
-    os.environ.get("VERTEXAI_EMBEDDING_LOCAL_BATCH_SIZE", "25")
-)
-
 # Only used for OpenAI
 OPENAI_EMBEDDING_TIMEOUT = int(
    os.environ.get("OPENAI_EMBEDDING_TIMEOUT", API_BASED_EMBEDDING_TIMEOUT)
@@ -206,12 +200,12 @@ SUPPORTED_EMBEDDING_MODELS = [
        index_name="danswer_chunk_text_embedding_3_small",
    ),
    SupportedEmbeddingModel(
-        name="google/text-embedding-005",
+        name="google/text-embedding-004",
        dim=768,
        index_name="danswer_chunk_google_text_embedding_004",
    ),
    SupportedEmbeddingModel(
-        name="google/text-embedding-005",
+        name="google/text-embedding-004",
        dim=768,
        index_name="danswer_chunk_text_embedding_004",
    ),
--- a/backend/shared_configs/enums.py
+++ b/backend/shared_configs/enums.py
@@ -13,7 +13,6 @@ class EmbeddingProvider(str, Enum):
 class RerankerProvider(str, Enum):
    COHERE = "cohere"
    LITELLM = "litellm"
-    BEDROCK = "bedrock"


 class EmbedTextType(str, Enum):
--- a/backend/supervisord.conf
+++ b/backend/supervisord.conf
@@ -108,7 +108,6 @@ command=tail -qF
    /var/log/celery_worker_light.log
    /var/log/celery_worker_heavy.log
    /var/log/celery_worker_indexing.log
-    /var/log/celery_worker_monitoring.log
    /var/log/slack_bot.log
 stdout_logfile=/dev/stdout
 stdout_logfile_maxbytes = 0  # must be set to 0 when stdout_logfile=/dev/stdout
--- a/backend/tests/daily/connectors/confluence/test_confluence_basic.py
+++ b/backend/tests/daily/connectors/confluence/test_confluence_basic.py
@@ -45,7 +45,7 @@ def test_confluence_connector_basic(
    with pytest.raises(StopIteration):
        next(doc_batch_generator)

-    assert len(doc_batch) == 2
+    assert len(doc_batch) == 3

    page_within_a_page_doc: Document | None = None
    page_doc: Document | None = None
--- a/backend/tests/daily/connectors/confluence/test_confluence_permissions_basic.py
+++ b/backend/tests/daily/connectors/confluence/test_confluence_permissions_basic.py
@@ -41,10 +41,5 @@ def test_confluence_connector_permissions(
    for slim_doc_batch in confluence_connector.retrieve_all_slim_documents():
        all_slim_doc_ids.update([doc.id for doc in slim_doc_batch])

-    # Find IDs that are in full but not in slim
-    difference = all_full_doc_ids - all_slim_doc_ids
-
    # The set of full doc IDs should be always be a subset of the slim doc IDs
-    assert all_full_doc_ids.issubset(
-        all_slim_doc_ids
-    ), f"Full doc IDs are not a subset of slim doc IDs. Found {len(difference)} IDs in full docs but not in slim docs."
+    assert all_full_doc_ids.issubset(all_slim_doc_ids)
--- a/backend/tests/integration/common_utils/reset.py
+++ b/backend/tests/integration/common_utils/reset.py
@@ -25,7 +25,7 @@ from onyx.indexing.models import IndexingSetting
 from onyx.setup import setup_postgres
 from onyx.setup import setup_vespa
 from onyx.utils.logger import setup_logger
-from tests.integration.common_utils.timeout import run_with_timeout_multiproc
+from tests.integration.common_utils.timeout import run_with_timeout

 logger = setup_logger()

@@ -161,7 +161,7 @@ def reset_postgres(
    for _ in range(NUM_TRIES):
        logger.info(f"Downgrading Postgres... ({_ + 1}/{NUM_TRIES})")
        try:
-            run_with_timeout_multiproc(
+            run_with_timeout(
                downgrade_postgres,
                TIMEOUT,
                kwargs={
--- a/backend/tests/integration/common_utils/timeout.py
+++ b/backend/tests/integration/common_utils/timeout.py
@@ -6,9 +6,7 @@ from typing import TypeVar
 T = TypeVar("T")


-def run_with_timeout_multiproc(
-    task: Callable[..., T], timeout: int, kwargs: dict[str, Any]
-) -> T:
+def run_with_timeout(task: Callable[..., T], timeout: int, kwargs: dict[str, Any]) -> T:
    # Use multiprocessing to prevent a thread from blocking the main thread
    with multiprocessing.Pool(processes=1) as pool:
        async_result = pool.apply_async(task, kwds=kwargs)
--- a/backend/tests/integration/tests/query_history/test_usage_reports.py
+++ b/backend/tests/integration/tests/query_history/test_usage_reports.py
@@ -1,48 +0,0 @@
-from datetime import datetime
-from datetime import timedelta
-from datetime import timezone
-
-from ee.onyx.db.usage_export import get_all_empty_chat_message_entries
-from onyx.db.engine import get_session_with_current_tenant
-from onyx.db.seeding.chat_history_seeding import seed_chat_history
-
-
-def test_usage_reports(reset: None) -> None:
-    EXPECTED_SESSIONS = 2048
-    MESSAGES_PER_SESSION = 4
-
-    # divide by 2 because only messages of type USER are returned
-    EXPECTED_MESSAGES = EXPECTED_SESSIONS * MESSAGES_PER_SESSION / 2
-
-    seed_chat_history(EXPECTED_SESSIONS, MESSAGES_PER_SESSION, 90)
-
-    with get_session_with_current_tenant() as db_session:
-        # count of all entries should be exact
-        period = (
-            datetime.fromtimestamp(0, tz=timezone.utc),
-            datetime.now(tz=timezone.utc),
-        )
-
-        count = 0
-        for entry_batch in get_all_empty_chat_message_entries(db_session, period):
-            for entry in entry_batch:
-                count += 1
-
-        assert count == EXPECTED_MESSAGES
-
-        # count in a one month time range should be within a certain range statistically
-        # this can be improved if we seed the chat history data deterministically
-        period = (
-            datetime.now(tz=timezone.utc) - timedelta(days=30),
-            datetime.now(tz=timezone.utc),
-        )
-
-        count = 0
-        for entry_batch in get_all_empty_chat_message_entries(db_session, period):
-            for entry in entry_batch:
-                count += 1
-
-        lower = EXPECTED_MESSAGES // 3 - (EXPECTED_MESSAGES // (3 * 3))
-        upper = EXPECTED_MESSAGES // 3 + (EXPECTED_MESSAGES // (3 * 3))
-        assert count > lower
-        assert count < upper
--- a/Show More
+++ b/Show More